Date: 2021_03_19
Analyzed by: Sara Saez Atienzar
Mendelian randomization is an analytical method that uses genetic information, in the form of genetic variants (instruments) to infer causal relationships between two traits/phenotypes (known as exposure and outcome, in this case we use MG as outcome). For a good description of the method, see https://elifesciences.org/articles/34408
Important links that will help you to understand the analysis and results can be found in here:
Perform Mendelian randomization analysis to identify possible causal relationship between traits in ieu-a, ebi-a and ukb-a datasets and MG
Jupyter notebook containing the script can be found in here:
%%bash
pwd
#mkdir MR_Base
cd MR_Base
#mkdir Plots
#mkdir Results
#mkdir Data
#mkdir Scripts
#mkdir Swarms
#mkdir Swarms_oe
ls
/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG Data Plots Results Rplots.pdf Scripts Swarms Swarms_oe
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data
echo "id
ieu-a-1016
ieu-a-1081
ieu-a-1106
ieu-a-1124
ieu-a-1186
ieu-a-819" > failed.ieu_a_batch.txt
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Scripts
echo "R script ieu traits"
cat ieu-a.R
echo "ieu-a.sh"
cat ieu-a.sh
sbatch --partition=largemem --mem=400g --cpus-per-task=8 --gres=lscratch:100 --time=12:00:00 ieu-a.sh
R script ieu traits
library(data.table)
library(TwoSampleMR)
#install.packages("MRinstruments")
library(MRInstruments)
#install.packages("WSpiller/RadialMR")
library("RadialMR")
library("ieugwasr")
sessionInfo()
MGtemp = fread("/data/NDRS_LNG/MyastheniaGravis/updated.April2020/MetaAnalysis.hg38.ByCohort.rerun/USmerged.Itals.hg38/META_MG.USmerged.Itals.UNRELATED.hg38.Rsq03.glm_filteredDirection.HetISq80MAF001cases.rsid.txt", header = T)
head(MGtemp)
MGtemp$SNP = MGtemp$rsID
MGtemp$Freq1 = MGtemp$maf_EA.CASE
MGtemp$effect_allele = MGtemp$EffectAllele
MGtemp$other_allele = MGtemp$OtherAllele
MGtemp$Beta = MGtemp$beta
MGtemp$P.value = MGtemp$P
Out_data = format_data(MGtemp,type="outcome", beta_col = "Beta", se_col = "StdErr", eaf_col = "Freq1", pval_col = "P.value")
#the token will be different each time, it needs to be prepare at the moment
token <- "ya29.a0AfH6SMBRL6s5n8eH5plgY38OYik3hb6v5RwqJJrKCVqPlRfZWRhuR24IN7X3m7wmSIRaLVeS16-_JFOIzqDFa46UNXrB9tsNozuOjkjYxxRNNNVs7OEOnekORP1uIbDhTSi9wUFDLOlMgjsz-daSjzsKnas5fg"
#create a list with the GWAS id, this can be obtained from the ieu OpenGwas Project (https://gwas.mrcieu.ac.uk)
listOfGwasIds <- read.table("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data/ieu_a_batch.badremoved.txt", header = T)
for(i in 1:334)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
write.table(res, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ieu-a.DB/",instrumentId,"_ieu_a_res.txt",sep = ""), quote = F, sep = ",")
write.table(het, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ieu-a.DB/",instrumentId,"_ieu_a_het.txt",sep = ""), quote = F, sep = ",")
write.table(ple, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ieu-a.DB/",instrumentId,"_ieu_a_ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
p <- mr_scatter_plot(res, dat)
write.table(res_single, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ieu-a.DB/",instrumentId,"__ieu_a_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
ieu-a.sh
#!/bin/bash
module load R/4.0.3
Rscript /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Scripts/ieu-a.R
11027438
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data
echo "id
30 ebi-a-GCST003368
182 ebi-a-GCST005540
183 ebi-a-GCST0055439" > failed.ebi_a_batch.txt
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Scripts
echo "R script ebi traits"
cat ebi-a.R
echo "ebi-a.sh"
cat ebi-a.sh
sbatch --partition=largemem --mem=400g --cpus-per-task=8 --gres=lscratch:100 --time=12:00:00 ebi-a.sh
R script ebi traits
library(data.table)
library(TwoSampleMR)
#install.packages("MRinstruments")
library(MRInstruments)
#install.packages("WSpiller/RadialMR")
library("RadialMR")
library("ieugwasr")
sessionInfo()
MGtemp = fread("/data/NDRS_LNG/MyastheniaGravis/updated.April2020/MetaAnalysis.hg38.ByCohort.rerun/USmerged.Itals.hg38/META_MG.USmerged.Itals.UNRELATED.hg38.Rsq03.glm_filteredDirection.HetISq80MAF001cases.rsid.txt", header = T)
head(MGtemp)
MGtemp$SNP = MGtemp$rsID
MGtemp$Freq1 = MGtemp$maf_EA.CASE
MGtemp$effect_allele = MGtemp$EffectAllele
MGtemp$other_allele = MGtemp$OtherAllele
MGtemp$Beta = MGtemp$beta
MGtemp$P.value = MGtemp$P
Out_data = format_data(MGtemp,type="outcome", beta_col = "Beta", se_col = "StdErr", eaf_col = "Freq1", pval_col = "P.value")
#the token will be different each time, it needs to be prepare at the moment
token <-"ya29.a0AfH6SMAl_dbCwIzoGxFNGB512Wg5B5cnq4_XB1pUAERmRUY7fWp5BY0vvMiNr5akEmwkMumzkzT2Yk7uyKQXzshsL7Kp4TRPlOa6ZlZdl1HjA26T5pc7TiOyUuu8jIvRNfZ9jU3j5fRUvRGoFpSi0DHtLNyFmQ"
#create a list with the GWAS id, this can be obtained from the ieu OpenGwas Project (https://gwas.mrcieu.ac.uk)
listOfGwasIds <- read.table("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data/ebi_a_batch.badremoved.txt", header = T)
for(i in 1:286)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
write.table(res, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"_ebi_a_res.txt",sep = ""), quote = F, sep = ",")
write.table(het, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"_ebi_a_het.txt",sep = ""), quote = F, sep = ",")
write.table(ple, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"_ebi_a_ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
p <- mr_scatter_plot(res, dat)
write.table(res_single, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"__ebi_a_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
ebi-a.sh
#!/bin/bash
module load R/4.0.3
Rscript /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Scripts/ebi-a.R
11070388
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data
echo "id
ukb-a-130
ukb-a-167
ukb-a-171
ukb-a-256
ukb-a-372
ukb-a-411
ukb-a-430
ukb-a-509
ukb-a-526
ukb-a-527
ukb-a-554
ukb-a-555
ukb-a-556
ukb-a-571
ukb-a-583
ukb-a-67
ukb-a-73
ukb-a-83" > failed.ukb_a_batch.txt
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Scripts
echo "R script ukb-a traits"
cat ukb-a.R
echo "ukb-a.sh"
cat ukb-a.sh
sbatch --partition=largemem --mem=400g --cpus-per-task=8 --gres=lscratch:100 --time=12:00:00 ukb-a.sh
R script ukb-a traits
library(data.table)
library(TwoSampleMR)
#install.packages("MRinstruments")
library(MRInstruments)
#install.packages("WSpiller/RadialMR")
library("RadialMR")
library("ieugwasr")
sessionInfo()
MGtemp = fread("/data/NDRS_LNG/MyastheniaGravis/updated.April2020/MetaAnalysis.hg38.ByCohort.rerun/USmerged.Itals.hg38/META_MG.USmerged.Itals.UNRELATED.hg38.Rsq03.glm_filteredDirection.HetISq80MAF001cases.rsid.txt", header = T)
head(MGtemp)
MGtemp$SNP = MGtemp$rsID
MGtemp$Freq1 = MGtemp$maf_EA.CASE
MGtemp$effect_allele = MGtemp$EffectAllele
MGtemp$other_allele = MGtemp$OtherAllele
MGtemp$Beta = MGtemp$beta
MGtemp$P.value = MGtemp$P
Out_data = format_data(MGtemp,type="outcome", beta_col = "Beta", se_col = "StdErr", eaf_col = "Freq1", pval_col = "P.value")
#the token will be different each time, it needs to be prepare at the moment
token <- "ya29.a0AfH6SMBRL6s5n8eH5plgY38OYik3hb6v5RwqJJrKCVqPlRfZWRhuR24IN7X3m7wmSIRaLVeS16-_JFOIzqDFa46UNXrB9tsNozuOjkjYxxRNNNVs7OEOnekORP1uIbDhTSi9wUFDLOlMgjsz-daSjzsKnas5fg"
#create a list with the GWAS id, this can be obtained from the ieu OpenGwas Project (https://gwas.mrcieu.ac.uk)
listOfGwasIds <- read.table("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data/failed.ukb-a_batch.txt", header = T)
for(i in 1:600)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
write.table(res, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"_ebi_a_res.txt",sep = ""), quote = F, sep = ",")
write.table(het, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"_ebi_a_het.txt",sep = ""), quote = F, sep = ",")
write.table(ple, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"_ebi_a_ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
p <- mr_scatter_plot(res, dat)
write.table(res_single, file = paste("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi-a.DB/",instrumentId,"__ebi_a_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
ukb-a.sh
#!/bin/bash
module load R/4.0.3
Rscript /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Scripts/ukb-a.R
11121773
Select the traits with IVW FDR-corrected p-value < 0.05 and Weighted median and MR Egger row pvalue < 0.05. For the selected traits, get pleitropy and heterogeneity results too. Only two traits are selected for follow-up analysis:
import numpy as np
import pandas as pd
res = pd.read_csv("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ukb-a-ALL.clean.res.csv",sep=",")
res
| id.exposure | id.outcome | outcome | exposure | method | nsnp | b | se | pval | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | Inverse variance weighted | 67 | 9.061093 | 1.643453 | 3.520000e-08 |
| 1 | ukb-a-190 | hOPa3X | outcome | Treatment/medication code: levothyroxine sodiu... | Inverse variance weighted | 47 | 10.934091 | 2.381289 | 4.400000e-06 |
| 2 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | Weighted median | 67 | 8.954881 | 1.983226 | 6.320000e-06 |
| 3 | ukb-a-291 | hOPa3X | outcome | Trunk fat mass || id:ukb-a-291 | Inverse variance weighted | 254 | 0.514032 | 0.135833 | 1.541410e-04 |
| 4 | ukb-a-587 | hOPa3X | outcome | Diagnoses - main ICD10: R55 Syncope and collap... | Wald ratio | 1 | 106.200977 | 28.199440 | 1.658390e-04 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1764 | ukb-a-320 | hOPa3X | outcome | Age at last live birth || id:ukb-a-320 | Weighted mode | 3 | -0.003447 | 1.276927 | 9.980914e-01 |
| 1765 | ukb-a-320 | hOPa3X | outcome | Age at last live birth || id:ukb-a-320 | Simple mode | 3 | -0.003447 | 1.298070 | 9.981225e-01 |
| 1766 | ukb-a-379 | hOPa3X | outcome | Hand grip strength (right) || id:ukb-a-379 | Inverse variance weighted | 89 | -0.000872 | 0.463789 | 9.984991e-01 |
| 1767 | ukb-a-537 | hOPa3X | outcome | Diagnoses - main ICD10: I80 Phlebitis and thro... | Weighted median | 6 | -0.013610 | 8.395336 | 9.987065e-01 |
| 1768 | ukb-a-548 | hOPa3X | outcome | Diagnoses - main ICD10: K35 Acute appendicitis... | Wald ratio | 1 | 0.000000 | 28.737609 | 1.000000e+00 |
1769 rows × 9 columns
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ukb-a.DB
cat *_ukb_a.ALL.res.txt > ukb_a.ALL.res.txt
cat *_ukb_a.ALL.het.txt > ukb_a.ALL.het.txt
cat *_ukb_a.ALL.ple.txt > ukb_a.ALL.ple.txt
#open in excel and remove multiples headers, the new clean files will be in cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results
# ukb-a-ALL.clean.res.csv
# ukb-a-ALL.clean.het.csv
# ukb-a-ALL.clean.ple.csv
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results
ls *ukb-a-ALL.clean.*
module load R/4.0.3
R --vanilla --no-save
# Load libraries
library(data.table)
library(tidyverse)
library(dplyr)
#open clean ukb-a-ALL file witht the results
data = fread("ukb-a-ALL.clean.res.csv", header = T)
head(data)
#used methods
table(data$method)
exposures <- unique(data$exposure)
numberOfexposures <- length(exposures)
#how many exposures do we have
numberOfexposures
#filter using IVW
IVW = filter(data, method == "Inverse variance weighted")
dim(IVW)
head(IVW)
Padj.fdr = p.adjust(IVW$pval, method = "fdr")
table(Padj.fdr)
temp = cbind(IVW, Padj.fdr)
head(temp)
temp2 <- temp[order(Padj.fdr),]
head(temp2)
#save FDR corrected IVW results
write.csv(temp2, "ukb-a.IVW.FDR.ALL.csv")
# Select the traits with MR Egger and Median weight significant too.
sigIVW = filter(temp2,Padj.fdr < 0.05)
dim(sigIVW)
sigMREgger = filter(data, method == "MR Egger")
sigMREgger = filter(sigMREgger,pval < 0.05)
dim(sigMREgger)
sigWeightedmedian = filter(data, method == "Weighted median")
sigWeightedmedian = filter(sigWeightedmedian,pval < 0.05)
dim(sigWeightedmedian)
merged = merge(sigIVW, sigMREgger, by = "id.exposure")
dim(merged)
merged2 = merge(merged, sigWeightedmedian, by = "id.exposure")
dim(merged)
toprint = merged2
id.exposure = toprint$id.exposure
id.exposure
#ukb-a-190
#ukb-a-77
sigIVW = filter(sigIVW, id.exposure == "ukb-a-190" | id.exposure == "ukb-a-77")
sigMREgger = filter(sigMREgger, id.exposure == "ukb-a-190" | id.exposure == "ukb-a-77")
sigWeightedmedian = filter(sigWeightedmedian, id.exposure == "ukb-a-190" | id.exposure == "ukb-a-77")
binded = rbind(sigIVW, sigMREgger, fill=TRUE)
binded2 = rbind(binded, sigWeightedmedian, fill=TRUE)
#save results with the significant traits for follow up
write.table(binded2, "RESULTS.ukb-a.txt", quote = F, sep = ",", row.names = F)
#open clean ple results file
ple = fread("ukb-a-ALL.clean.ple.csv", header=T)
ple = filter(ple, id.exposure == "ukb-a-190"| id.exposure == "ukb-a-77")
head(ple)
write.table(ple, "PLEITROPY.ukb-a.txt", quote = F, sep = ",", row.names = F)
#open clean het results file
het = fread("ukb-a-ALL.clean.het.csv", header=T)
het = filter(het, id.exposure == "ukb-a-190"| id.exposure == "ukb-a-77")
head(het)
write.table(het, "HETEROGENEITY.ukb-a.txt", quote = F, sep = ",", row.names = F)
ukb-a-ALL.clean.het.csv
ukb-a-ALL.clean.ple.csv
ukb-a-ALL.clean.res.csv
R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> # Load libraries
> library(data.table)
> library(tidyverse)
> library(dplyr)
> #open clean ukb-a-ALL file witht the results
> data = fread("ukb-a-ALL.clean.res.csv", header = T)
> head(data)
id.exposure id.outcome outcome
1: ukb-a-77 hOPa3X outcome
2: ukb-a-190 hOPa3X outcome
3: ukb-a-77 hOPa3X outcome
4: ukb-a-291 hOPa3X outcome
5: ukb-a-587 hOPa3X outcome
6: ukb-a-203 hOPa3X outcome
exposure
1: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
2: Treatment/medication code: levothyroxine sodium || id:ukb-a-190
3: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
4: Trunk fat mass || id:ukb-a-291
5: Diagnoses - main ICD10: R55 Syncope and collapse || id:ukb-a-587
6: Illnesses of father: None of the above (group 2) || id:ukb-a-203
method nsnp b se pval
1: Inverse variance weighted 67 9.0610935 1.6434530 3.52000e-08
2: Inverse variance weighted 47 10.9340910 2.3812886 4.40000e-06
3: Weighted median 67 8.9548814 1.9832261 6.32000e-06
4: Inverse variance weighted 254 0.5140323 0.1358328 1.54141e-04
5: Wald ratio 1 106.2009772 28.1994404 1.65839e-04
6: Wald ratio 1 -18.3541288 5.1053724 3.24315e-04
> #used methods
> table(data$method)
Inverse variance weighted MR Egger Simple mode
376 325 325
Wald ratio Weighted median Weighted mode
93 325 325
> exposures <- unique(data$exposure)
> numberOfexposures <- length(exposures)
>
> #how many exposures do we have
> numberOfexposures
[1] 469
>
> #filter using IVW
> IVW = filter(data, method == "Inverse variance weighted")
> dim(IVW)
[1] 376 9
> head(IVW)
id.exposure id.outcome outcome
1: ukb-a-77 hOPa3X outcome
2: ukb-a-190 hOPa3X outcome
3: ukb-a-291 hOPa3X outcome
4: ukb-a-265 hOPa3X outcome
5: ukb-a-275 hOPa3X outcome
6: ukb-a-101 hOPa3X outcome
exposure
1: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
2: Treatment/medication code: levothyroxine sodium || id:ukb-a-190
3: Trunk fat mass || id:ukb-a-291
4: Whole body fat mass || id:ukb-a-265
5: Leg fat mass (right) || id:ukb-a-275
6: Non-cancer illness code self-reported: malabsorption/coeliac disease || id:ukb-a-101
method nsnp b se pval
1: Inverse variance weighted 67 9.0610935 1.6434530 3.52000e-08
2: Inverse variance weighted 47 10.9340910 2.3812886 4.40000e-06
3: Inverse variance weighted 254 0.5140323 0.1358328 1.54141e-04
4: Inverse variance weighted 248 0.5009594 0.1424220 4.35749e-04
5: Inverse variance weighted 249 0.6065149 0.1729081 4.51947e-04
6: Inverse variance weighted 13 17.3617767 5.0005616 5.16663e-04
> Padj.fdr = p.adjust(IVW$pval, method = "fdr")
> table(Padj.fdr)
Padj.fdr
1.32352e-05 0.0008272 0.0193190053333333 0.031226095
1 1 1 5
0.0333230835555556 0.0365273472 0.0498662821818182 0.0579971226666667
1 1 1 1
0.0595927895384615 0.065442424 0.0738963829333333 0.0745148105
1 1 1 1
0.0857786936470588 0.0891238143157895 0.0900989624 0.0917402579047619
1 2 1 1
0.114120306909091 0.128454822666667 0.19020878944 0.196952237714286
1 2 1 3
0.201271386758621 0.214209261935484 0.217759883 0.234693024484848
1 2 1 1
0.234700781411765 0.246301184914286 0.26792083425641 0.303769831822222
1 1 4 6
0.326397704 0.341574984 0.3505515915 0.356864178285714
1 1 1 1
0.36159227408 0.362504416313725 0.371744171692308 0.372110949963636
1 1 1 3
0.374452914428571 0.375463285931034 0.382224265762712 0.3895584472
1 2 1 1
0.392518900459016 0.416929736634921 0.4428623175 0.491455358276923
1 2 1 1
0.496607919151515 0.521951893767442 0.527418722956522 0.529665588301075
1 20 6 1
0.532864540865979 0.534428792565657 0.542128727762376 0.543626885359223
4 2 2 2
0.548754949846154 0.551806953828571 0.564256059514019 0.579190712778761
1 1 2 6
0.586526977964912 0.602873145808696 0.603701113931034 0.613821545777778
1 1 1 1
0.624387594711864 0.639385658689076 0.640130649719008 0.643224220131148
1 1 2 1
0.651652977889764 0.652602791069767 0.654944466137405 0.658085497272727
5 2 2 1
0.691994422647059 0.6940263272 0.701243405333333 0.702443700450704
4 4 1 1
0.704565683188811 0.711804513205479 0.718037060789116 0.71891012252349
1 3 1 2
0.721168856582782 0.74879546187013 0.755164915716129 0.776069312205128
2 3 1 1
0.791223491363057 0.794052043371069 0.833031729639752 0.836653281276074
1 2 2 2
0.841542355087719 0.844893234309278 0.846420294728205 0.848290361487437
8 23 1 4
0.84960893072 0.85295192760199 0.858097313411765 0.861030517463415
1 1 3 1
0.865046594504673 0.865782736925926 0.871975625585254 0.875212938825688
9 2 1 1
0.875423234447489 0.878850421714286 0.879529246862222 0.885710191157895
1 5 1 3
0.891884939304348 0.892360236 0.8939733056 0.910457294866397
2 2 3 12
0.916636835193548 0.92046181384739 0.921112375456 0.922764641936508
1 1 1 2
0.922880306992126 0.925957856815686 0.92674227525 0.927678058645914
2 1 1 1
0.928028503937984 0.928955441328185 0.935006602066421 0.936745363352941
1 1 12 1
0.937417576908425 0.939179241221818 0.956546756890459 0.958696226441558
1 2 8 25
0.960912314459547 0.96106570829582 0.966977233538462 0.968460906760383
1 2 1 1
0.97037940645 0.970794692192771 0.976414617657658 0.976882343209581
7 12 1 1
0.978457596513433 0.978807737738095 0.979047544542773 0.979494569725947
1 1 3 4
0.982096661604651 0.983944469552408 0.986500924426966 0.989204560267409
1 9 3 3
0.989276688355556 0.989523814072022 0.993021834659401 0.99849912
1 1 6 9
> temp = cbind(IVW, Padj.fdr)
> head(temp)
id.exposure id.outcome outcome
1: ukb-a-77 hOPa3X outcome
2: ukb-a-190 hOPa3X outcome
3: ukb-a-291 hOPa3X outcome
4: ukb-a-265 hOPa3X outcome
5: ukb-a-275 hOPa3X outcome
6: ukb-a-101 hOPa3X outcome
exposure
1: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
2: Treatment/medication code: levothyroxine sodium || id:ukb-a-190
3: Trunk fat mass || id:ukb-a-291
4: Whole body fat mass || id:ukb-a-265
5: Leg fat mass (right) || id:ukb-a-275
6: Non-cancer illness code self-reported: malabsorption/coeliac disease || id:ukb-a-101
method nsnp b se pval Padj.fdr
1: Inverse variance weighted 67 9.0610935 1.6434530 3.52000e-08 0.0000132352
2: Inverse variance weighted 47 10.9340910 2.3812886 4.40000e-06 0.0008272000
3: Inverse variance weighted 254 0.5140323 0.1358328 1.54141e-04 0.0193190053
4: Inverse variance weighted 248 0.5009594 0.1424220 4.35749e-04 0.0312260950
5: Inverse variance weighted 249 0.6065149 0.1729081 4.51947e-04 0.0312260950
6: Inverse variance weighted 13 17.3617767 5.0005616 5.16663e-04 0.0312260950
> temp2 <- temp[order(Padj.fdr),]
> head(temp2)
id.exposure id.outcome outcome
1: ukb-a-77 hOPa3X outcome
2: ukb-a-190 hOPa3X outcome
3: ukb-a-291 hOPa3X outcome
4: ukb-a-265 hOPa3X outcome
5: ukb-a-275 hOPa3X outcome
6: ukb-a-101 hOPa3X outcome
exposure
1: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
2: Treatment/medication code: levothyroxine sodium || id:ukb-a-190
3: Trunk fat mass || id:ukb-a-291
4: Whole body fat mass || id:ukb-a-265
5: Leg fat mass (right) || id:ukb-a-275
6: Non-cancer illness code self-reported: malabsorption/coeliac disease || id:ukb-a-101
method nsnp b se pval Padj.fdr
1: Inverse variance weighted 67 9.0610935 1.6434530 3.52000e-08 0.0000132352
2: Inverse variance weighted 47 10.9340910 2.3812886 4.40000e-06 0.0008272000
3: Inverse variance weighted 254 0.5140323 0.1358328 1.54141e-04 0.0193190053
4: Inverse variance weighted 248 0.5009594 0.1424220 4.35749e-04 0.0312260950
5: Inverse variance weighted 249 0.6065149 0.1729081 4.51947e-04 0.0312260950
6: Inverse variance weighted 13 17.3617767 5.0005616 5.16663e-04 0.0312260950
> #save FDR corrected IVW results
> write.csv(temp2, "ukb-a.IVW.FDR.ALL.csv")
>
> # Select the traits with MR Egger and Median weight significant too.
>
> sigIVW = filter(temp2,Padj.fdr < 0.05)
> dim(sigIVW)
[1] 11 10
>
>
> sigMREgger = filter(data, method == "MR Egger")
> sigMREgger = filter(sigMREgger,pval < 0.05)
> dim(sigMREgger)
[1] 12 9
>
> sigWeightedmedian = filter(data, method == "Weighted median")
> sigWeightedmedian = filter(sigWeightedmedian,pval < 0.05)
> dim(sigWeightedmedian)
[1] 21 9
>
>
> merged = merge(sigIVW, sigMREgger, by = "id.exposure")
> dim(merged)
[1] 3 18
> merged2 = merge(merged, sigWeightedmedian, by = "id.exposure")
> dim(merged)
[1] 3 18
> toprint = merged2
> id.exposure = toprint$id.exposure
> id.exposure
[1] "ukb-a-190" "ukb-a-77"
>
> #ukb-a-190
> #ukb-a-77
>
>
> sigIVW = filter(sigIVW, id.exposure == "ukb-a-190" | id.exposure == "ukb-a-77")
> sigMREgger = filter(sigMREgger, id.exposure == "ukb-a-190" | id.exposure == "ukb-a-77")
> sigWeightedmedian = filter(sigWeightedmedian, id.exposure == "ukb-a-190" | id.exposure == "ukb-a-77")
>
> binded = rbind(sigIVW, sigMREgger, fill=TRUE)
> binded2 = rbind(binded, sigWeightedmedian, fill=TRUE)
>
>
>
> #save results with the significant traits for follow up
> write.table(binded2, "RESULTS.ukb-a.txt", quote = F, sep = ",", row.names = F)
>
>
>
> #open clean ple results file
> ple = fread("ukb-a-ALL.clean.ple.csv", header=T)
> ple = filter(ple, id.exposure == "ukb-a-190"| id.exposure == "ukb-a-77")
> head(ple)
id.exposure id.outcome outcome
1: ukb-a-190 hOPa3X outcome
2: ukb-a-77 hOPa3X outcome
exposure
1: Treatment/medication code: levothyroxine sodium || id:ukb-a-190
2: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
egger_intercept se pval
1: -0.008787168 0.02759543 0.7516332
2: -0.002169242 0.01977369 0.9129826
> write.table(ple, "PLEITROPY.ukb-a.txt", quote = F, sep = ",", row.names = F)
>
> #open clean het results file
> het = fread("ukb-a-ALL.clean.het.csv", header=T)
> het = filter(het, id.exposure == "ukb-a-190"| id.exposure == "ukb-a-77")
> head(het)
id.exposure id.outcome outcome
1: ukb-a-190 hOPa3X outcome
2: ukb-a-190 hOPa3X outcome
3: ukb-a-77 hOPa3X outcome
4: ukb-a-77 hOPa3X outcome
exposure
1: Treatment/medication code: levothyroxine sodium || id:ukb-a-190
2: Treatment/medication code: levothyroxine sodium || id:ukb-a-190
3: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
4: Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
method Q Q_df Q_pval
1: MR Egger 139.6972 45 1.24e-11
2: Inverse variance weighted 140.0120 46 1.99e-11
3: MR Egger 166.8063 65 6.78e-11
4: Inverse variance weighted 166.8372 66 1.09e-10
> write.table(het, "HETEROGENEITY.ukb-a.txt", quote = F, sep = ",", row.names = F)
>
>
cat: *_ukb_a.ALL.res.txt: No such file or directory cat: *_ukb_a.ALL.het.txt: No such file or directory cat: *_ukb_a.ALL.ple.txt: No such file or directory [-] Unloading gcc 9.2.0 ... [-] Unloading GSL 2.6 for GCC 9.2.0 ... [-] Unloading openmpi 3.1.4 for GCC 9.2.0 [-] Unloading ImageMagick 7.0.8 on cn1013 [-] Unloading HDF5 1.10.4 [-] Unloading NetCDF 4.7.4_gcc9.2.0 [-] Unloading pandoc 2.11.4 on cn1013 [-] Unloading pcre2 10.21 ... [-] Unloading R 4.0.3 [+] Loading gcc 9.2.0 ... [+] Loading GSL 2.6 for GCC 9.2.0 ... [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading openmpi 3.1.4 for GCC 9.2.0 [+] Loading ImageMagick 7.0.8 on cn1013 [+] Loading HDF5 1.10.4 [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading NetCDF 4.7.4_gcc9.2.0 [+] Loading pandoc 2.11.4 on cn1013 [+] Loading pcre2 10.21 ... [+] Loading R 4.0.3 The following have been reloaded with a version change: 1) R/4.0 => R/4.0.3 ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ── ✔ ggplot2 3.3.3 ✔ purrr 0.3.4 ✔ tibble 3.1.0 ✔ dplyr 1.0.5 ✔ tidyr 1.1.3 ✔ stringr 1.4.0 ✔ readr 1.4.0 ✔ forcats 0.5.1 ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::between() masks data.table::between() ✖ dplyr::filter() masks stats::filter() ✖ dplyr::first() masks data.table::first() ✖ dplyr::lag() masks stats::lag() ✖ dplyr::last() masks data.table::last() ✖ purrr::transpose() masks data.table::transpose()
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/RESULTS.ukb-a.txt",sep=",")
res
| id.exposure | id.outcome | outcome | exposure | method | nsnp | b | se | pval | Padj.fdr | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | Inverse variance weighted | 67 | 9.061093 | 1.643453 | 3.520000e-08 | 0.000013 |
| 1 | ukb-a-190 | hOPa3X | outcome | Treatment/medication code: levothyroxine sodiu... | Inverse variance weighted | 47 | 10.934091 | 2.381289 | 4.400000e-06 | 0.000827 |
| 2 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | MR Egger | 67 | 9.449497 | 3.908585 | 1.843526e-02 | NaN |
| 3 | ukb-a-190 | hOPa3X | outcome | Treatment/medication code: levothyroxine sodiu... | MR Egger | 47 | 12.735893 | 6.148272 | 4.407815e-02 | NaN |
| 4 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | Weighted median | 67 | 8.954881 | 1.983226 | 6.320000e-06 | NaN |
| 5 | ukb-a-190 | hOPa3X | outcome | Treatment/medication code: levothyroxine sodiu... | Weighted median | 47 | 7.277228 | 2.572941 | 4.678575e-03 | NaN |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/HETEROGENEITY.ukb-a.txt",sep=",")
res
| id.exposure | id.outcome | outcome | exposure | method | Q | Q_df | Q_pval | |
|---|---|---|---|---|---|---|---|---|
| 0 | ukb-a-190 | hOPa3X | outcome | Treatment/medication code: levothyroxine sodiu... | MR Egger | 139.697199 | 45 | 1.240000e-11 |
| 1 | ukb-a-190 | hOPa3X | outcome | Treatment/medication code: levothyroxine sodiu... | Inverse variance weighted | 140.011973 | 46 | 1.990000e-11 |
| 2 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | MR Egger | 166.806280 | 65 | 6.780000e-11 |
| 3 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | Inverse variance weighted | 166.837164 | 66 | 1.090000e-10 |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/PLEITROPY.ukb-a.txt",sep=",")
res
| id.exposure | id.outcome | outcome | exposure | egger_intercept | se | pval | |
|---|---|---|---|---|---|---|---|
| 0 | ukb-a-190 | hOPa3X | outcome | Treatment/medication code: levothyroxine sodiu... | -0.008787 | 0.027595 | 0.751633 |
| 1 | ukb-a-77 | hOPa3X | outcome | Non-cancer illness code self-reported: hypoth... | -0.002169 | 0.019774 | 0.912983 |
Select the traits with IVW FDR-corrected p-value < 0.05 and Weighted median and MR Egger row pvalue < 0.05. For the selected traits, get pleitropy and heterogeneity results too. Three traits are selected for follow-up analysis:
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ebi_a.DB
cat *_ebi_a_res.txt > ebi-a-ALL.res.csv
cat *_ebi_a_het.txt > ebi-a-ALL.het.csv
cat *_ebi_a_ple.txt > ebi-a-ALL.ple.csv
#open in excel remove multiples headers
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results
module load R/4.0.3
R --vanilla --no-save
# Load libraries
library(data.table)
library(tidyverse)
library(dplyr)
#open clean ukb-a-ALL file witht the results
data = fread("ebi-a-ALL.clean.res.csv", header = T)
head(data)
#used methods
table(data$method)
exposures <- unique(data$exposure)
numberOfexposures <- length(exposures)
#how many exposure do we haver
numberOfexposures
#filter using IVW
IVW = filter(data, method == "Inverse variance weighted")
dim(IVW)
head(IVW)
Padj.fdr = p.adjust(IVW$pval, method = "fdr")
table(Padj.fdr)
temp = cbind(IVW, Padj.fdr)
head(temp)
temp2 <- temp[order(Padj.fdr),]
head(temp2)
#save FDR corrected IVW results
write.csv(temp2, "ebi-a.IVW.FDR.ALL.csv")
# Select the traits with MR Egger and Median weight significant too.
sigIVW = filter(temp2,Padj.fdr < 0.05)
dim(sigIVW)
print(sigIVW)
sigMREgger = filter(data, method == "MR Egger")
sigMREgger = filter(sigMREgger,pval < 0.05)
dim(sigMREgger)
print(sigMREgger)
sigWeightedmedian = filter(data, method == "Weighted median")
sigWeightedmedian = filter(sigWeightedmedian,pval < 0.05)
dim(sigWeightedmedian)
print(sigWeightedmedian)
merged = merge(sigIVW, sigMREgger, by = "id.exposure")
dim(merged)
print(merged)
merged2 = merge(merged, sigWeightedmedian, by = "id.exposure")
dim(merged2)
print(merged2)
toprint = merged2
print(toprint)
#id.exposure
#ebi-a-GCST002318
#ebi-a-GCST005531
#ebi-a-GCST005536
sigIVW = filter(sigIVW, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
sigMREgger = filter(sigMREgger, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
sigWeightedmedian = filter(sigWeightedmedian, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
binded = rbind(sigIVW, sigMREgger, fill=TRUE)
binded2 = rbind(binded, sigWeightedmedian, fill=TRUE)
#save results with the significant traits for follow up
write.table(binded2, "RESULTS.ebi-a.txt", quote = F, sep = ",", row.names = F)
#open clean ple results file
ple = fread("ebi-a-ALL.clean.ple.csv", header=T)
ple = filter(ple, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
head(ple)
write.table(ple, "PLEITROPY.ebi-a.txt", quote = F, sep = ",", row.names = F)
#open clean het results file
het = fread("ebi-a-ALL.clean.het.csv", header=T)
het = filter(het, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
head(het)
write.table(het, "HETEROGENEITY.ebi-a.txt", quote = F, sep = ",", row.names = F)
R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> # Load libraries
> library(data.table)
> library(tidyverse)
> library(dplyr)
> #open clean ukb-a-ALL file witht the results
> data = fread("ebi-a-ALL.clean.res.csv", header = T)
> head(data)
id.exposure id.outcome outcome
1: ebi-a-GCST005536 n4hbHD outcome
2: ebi-a-GCST005528 n4hbHD outcome
3: ebi-a-GCST002318 n4hbHD outcome
4: ebi-a-GCST006361 n4hbHD outcome
5: ebi-a-GCST002318 n4hbHD outcome
6: ebi-a-GCST003129 n4hbHD outcome
exposure
1: Type 1 diabetes || id:ebi-a-GCST005536
2: Juvenile idiopathic arthritis (oligoarticular or rheumatoid factor-negative polyarticular) || id:ebi-a-GCST005528
3: Rheumatoid arthritis || id:ebi-a-GCST002318
4: Anti-Epstein-Barr virus nuclear antigen (EBNA) IgG levels || id:ebi-a-GCST006361
5: Rheumatoid arthritis || id:ebi-a-GCST002318
6: Primary biliary cholangitis || id:ebi-a-GCST003129
method nsnp b se pval
1: Inverse variance weighted 30 0.3003068 0.06644808 0.000006200
2: Weighted median 4 -0.1726336 0.03835287 0.000006760
3: Inverse variance weighted 46 0.3343471 0.07449680 0.000007190
4: Wald ratio 1 -0.7821369 0.17551069 0.000008340
5: Weighted median 46 0.3369727 0.08614522 0.000091700
6: Inverse variance weighted 22 0.1489806 0.03842534 0.000105689
> #used methods
> table(data$method)
Inverse variance weighted MR Egger Simple mode
192 177 177
Wald ratio Weighted median Weighted mode
39 177 177
> exposures <- unique(data$exposure)
> numberOfexposures <- length(exposures)
> #how many exposure do we haver
> numberOfexposures
[1] 231
>
> #filter using IVW
> IVW = filter(data, method == "Inverse variance weighted")
> dim(IVW)
[1] 192 9
> head(IVW)
id.exposure id.outcome outcome
1: ebi-a-GCST005536 n4hbHD outcome
2: ebi-a-GCST002318 n4hbHD outcome
3: ebi-a-GCST003129 n4hbHD outcome
4: ebi-a-GCST005531 n4hbHD outcome
5: ebi-a-GCST006095 n4hbHD outcome
6: ebi-a-GCST005314 n4hbHD outcome
exposure method
1: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted
2: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted
3: Primary biliary cholangitis || id:ebi-a-GCST003129 Inverse variance weighted
4: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted
5: Excessive hairiness || id:ebi-a-GCST006095 Inverse variance weighted
6: Offspring birth weight || id:ebi-a-GCST005314 Inverse variance weighted
nsnp b se pval
1: 30 0.3003068 0.06644808 0.000006200
2: 46 0.3343471 0.07449680 0.000007190
3: 22 0.1489806 0.03842534 0.000105689
4: 43 0.1457677 0.04086487 0.000361000
5: 2 -0.5115343 0.14918993 0.000606379
6: 8 1.2003054 0.36649194 0.001056168
> Padj.fdr = p.adjust(IVW$pval, method = "fdr")
> table(Padj.fdr)
Padj.fdr
0.00069024 0.006764096 0.017328 0.0232849536
2 1 1 1
0.033797376 0.065684184 0.109265621333333 0.1722346752
1 2 1 1
0.205443630545455 0.24498552 0.502617422769231 0.508623636
1 1 1 3
0.5785398528 0.582636192 0.607350392888889 0.615309564342857
9 1 1 8
0.638995221333333 0.653102872216216 0.66175375831579 0.804246749538462
1 1 1 1
0.807548721777778 0.8399104896 0.843191108338983 0.856250165169231
15 1 4 6
0.861210578149254 0.869932708173913 0.876422288 0.878113398211765
2 2 15 1
0.883245362360656 0.886446472258064 0.896992758857143 0.90772142070229
37 2 2 5
0.910476128 0.935078184902256 0.949898679466667 0.956535646588235
1 1 2 1
0.961609688502857 0.966236965090909 0.966239632271186 0.979219159912088
39 1 1 5
0.981694665442623 0.992906875870968 0.996575792
1 3 6
> temp = cbind(IVW, Padj.fdr)
> head(temp)
id.exposure id.outcome outcome
1: ebi-a-GCST005536 n4hbHD outcome
2: ebi-a-GCST002318 n4hbHD outcome
3: ebi-a-GCST003129 n4hbHD outcome
4: ebi-a-GCST005531 n4hbHD outcome
5: ebi-a-GCST006095 n4hbHD outcome
6: ebi-a-GCST005314 n4hbHD outcome
exposure method
1: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted
2: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted
3: Primary biliary cholangitis || id:ebi-a-GCST003129 Inverse variance weighted
4: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted
5: Excessive hairiness || id:ebi-a-GCST006095 Inverse variance weighted
6: Offspring birth weight || id:ebi-a-GCST005314 Inverse variance weighted
nsnp b se pval Padj.fdr
1: 30 0.3003068 0.06644808 0.000006200 0.000690240
2: 46 0.3343471 0.07449680 0.000007190 0.000690240
3: 22 0.1489806 0.03842534 0.000105689 0.006764096
4: 43 0.1457677 0.04086487 0.000361000 0.017328000
5: 2 -0.5115343 0.14918993 0.000606379 0.023284954
6: 8 1.2003054 0.36649194 0.001056168 0.033797376
> temp2 <- temp[order(Padj.fdr),]
> head(temp2)
id.exposure id.outcome outcome
1: ebi-a-GCST005536 n4hbHD outcome
2: ebi-a-GCST002318 n4hbHD outcome
3: ebi-a-GCST003129 n4hbHD outcome
4: ebi-a-GCST005531 n4hbHD outcome
5: ebi-a-GCST006095 n4hbHD outcome
6: ebi-a-GCST005314 n4hbHD outcome
exposure method
1: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted
2: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted
3: Primary biliary cholangitis || id:ebi-a-GCST003129 Inverse variance weighted
4: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted
5: Excessive hairiness || id:ebi-a-GCST006095 Inverse variance weighted
6: Offspring birth weight || id:ebi-a-GCST005314 Inverse variance weighted
nsnp b se pval Padj.fdr
1: 30 0.3003068 0.06644808 0.000006200 0.000690240
2: 46 0.3343471 0.07449680 0.000007190 0.000690240
3: 22 0.1489806 0.03842534 0.000105689 0.006764096
4: 43 0.1457677 0.04086487 0.000361000 0.017328000
5: 2 -0.5115343 0.14918993 0.000606379 0.023284954
6: 8 1.2003054 0.36649194 0.001056168 0.033797376
> #save FDR corrected IVW results
> write.csv(temp2, "ebi-a.IVW.FDR.ALL.csv")
>
> # Select the traits with MR Egger and Median weight significant too.
>
> sigIVW = filter(temp2,Padj.fdr < 0.05)
> dim(sigIVW)
[1] 6 10
> print(sigIVW)
id.exposure id.outcome outcome
1: ebi-a-GCST005536 n4hbHD outcome
2: ebi-a-GCST002318 n4hbHD outcome
3: ebi-a-GCST003129 n4hbHD outcome
4: ebi-a-GCST005531 n4hbHD outcome
5: ebi-a-GCST006095 n4hbHD outcome
6: ebi-a-GCST005314 n4hbHD outcome
exposure method
1: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted
2: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted
3: Primary biliary cholangitis || id:ebi-a-GCST003129 Inverse variance weighted
4: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted
5: Excessive hairiness || id:ebi-a-GCST006095 Inverse variance weighted
6: Offspring birth weight || id:ebi-a-GCST005314 Inverse variance weighted
nsnp b se pval Padj.fdr
1: 30 0.3003068 0.06644808 0.000006200 0.000690240
2: 46 0.3343471 0.07449680 0.000007190 0.000690240
3: 22 0.1489806 0.03842534 0.000105689 0.006764096
4: 43 0.1457677 0.04086487 0.000361000 0.017328000
5: 2 -0.5115343 0.14918993 0.000606379 0.023284954
6: 8 1.2003054 0.36649194 0.001056168 0.033797376
>
> sigMREgger = filter(data, method == "MR Egger")
> sigMREgger = filter(sigMREgger,pval < 0.05)
> dim(sigMREgger)
[1] 9 9
> print(sigMREgger)
id.exposure id.outcome outcome
1: ebi-a-GCST005212 n4hbHD outcome
2: ebi-a-GCST005536 n4hbHD outcome
3: ebi-a-GCST002318 n4hbHD outcome
4: ebi-a-GCST005047 n4hbHD outcome
5: ebi-a-GCST004826 n4hbHD outcome
6: ebi-a-GCST005531 n4hbHD outcome
7: ebi-a-GCST006941 n4hbHD outcome
8: ebi-a-GCST005902 n4hbHD outcome
9: ebi-a-GCST004621 n4hbHD outcome
exposure method nsnp
1: Asthma || id:ebi-a-GCST005212 MR Egger 15
2: Type 1 diabetes || id:ebi-a-GCST005536 MR Egger 30
3: Rheumatoid arthritis || id:ebi-a-GCST002318 MR Egger 46
4: Type 2 diabetes || id:ebi-a-GCST005047 MR Egger 24
5: P wave duration || id:ebi-a-GCST004826 MR Egger 8
6: Multiple sclerosis || id:ebi-a-GCST005531 MR Egger 43
7: Irritable mood || id:ebi-a-GCST006941 MR Egger 34
8: Depression (broad) || id:ebi-a-GCST005902 MR Egger 13
9: Sum basophil neutrophil counts || id:ebi-a-GCST004621 MR Egger 140
b se pval
1: -1.4250127 0.46148142 0.008645653
2: 0.3086710 0.11678593 0.013303516
3: 0.3764539 0.15007957 0.015889353
4: -0.9356519 0.38175897 0.022663644
5: 0.1104245 0.03837218 0.028141404
6: 0.1697660 0.07595469 0.030920281
7: 8.7229122 3.89522190 0.032208974
8: -23.7715314 9.94414990 0.035829432
9: -0.3490293 0.17531085 0.048464913
>
> sigWeightedmedian = filter(data, method == "Weighted median")
> sigWeightedmedian = filter(sigWeightedmedian,pval < 0.05)
> dim(sigWeightedmedian)
[1] 19 9
> print(sigWeightedmedian)
id.exposure id.outcome outcome
1: ebi-a-GCST005528 n4hbHD outcome
2: ebi-a-GCST002318 n4hbHD outcome
3: ebi-a-GCST005536 n4hbHD outcome
4: ebi-a-GCST005186 n4hbHD outcome
5: ebi-a-GCST003129 n4hbHD outcome
6: ebi-a-GCST005180 n4hbHD outcome
7: ebi-a-GCST000568 n4hbHD outcome
8: ebi-a-GCST004826 n4hbHD outcome
9: ebi-a-GCST006948 n4hbHD outcome
10: ebi-a-GCST001198 n4hbHD outcome
11: ebi-a-GCST005314 n4hbHD outcome
12: ebi-a-GCST006701 n4hbHD outcome
13: ebi-a-GCST006091 n4hbHD outcome
14: ebi-a-GCST004988 n4hbHD outcome
15: ebi-a-GCST000612 n4hbHD outcome
16: ebi-a-GCST004939 n4hbHD outcome
17: ebi-a-GCST000964 n4hbHD outcome
18: ebi-a-GCST005531 n4hbHD outcome
19: ebi-a-GCST006862 n4hbHD outcome
exposure
1: Juvenile idiopathic arthritis (oligoarticular or rheumatoid factor-negative polyarticular) || id:ebi-a-GCST005528
2: Rheumatoid arthritis || id:ebi-a-GCST002318
3: Type 1 diabetes || id:ebi-a-GCST005536
4: Fasting blood glucose || id:ebi-a-GCST005186
5: Primary biliary cholangitis || id:ebi-a-GCST003129
6: Homeostasis model assessment of beta-cell function || id:ebi-a-GCST005180
7: Fasting blood glucose || id:ebi-a-GCST000568
8: P wave duration || id:ebi-a-GCST004826
9: Feeling nervous || id:ebi-a-GCST006948
10: Multiple sclerosis || id:ebi-a-GCST001198
11: Offspring birth weight || id:ebi-a-GCST005314
12: Parental longevity (father's attained age) || id:ebi-a-GCST006701
13: Freckles || id:ebi-a-GCST006091
14: Breast cancer || id:ebi-a-GCST004988
15: Celiac disease || id:ebi-a-GCST000612
16: Glycated hemoglobin levels || id:ebi-a-GCST004939
17: Ulcerative colitis || id:ebi-a-GCST000964
18: Multiple sclerosis || id:ebi-a-GCST005531
19: Asthma || id:ebi-a-GCST006862
method nsnp b se pval
1: Weighted median 4 -0.17263356 0.03835287 0.000006760
2: Weighted median 46 0.33697273 0.08614522 0.000091700
3: Weighted median 30 0.18744401 0.05384911 0.000499710
4: Weighted median 21 -1.14437930 0.38742995 0.003139170
5: Weighted median 22 0.13687382 0.04645854 0.003217562
6: Weighted median 3 2.16458744 0.74723960 0.003770161
7: Weighted median 13 -1.06730005 0.38276356 0.005296818
8: Weighted median 8 0.06778530 0.02650992 0.010558574
9: Weighted median 29 1.91718703 0.76257218 0.011933467
10: Weighted median 23 0.16800075 0.06888708 0.014736745
11: Weighted median 8 1.24087794 0.52296186 0.017654366
12: Weighted median 14 2.08387519 0.88331634 0.018316881
13: Weighted median 6 0.18091142 0.07976635 0.023328188
14: Weighted median 126 0.18540287 0.08400453 0.027310007
15: Weighted median 12 0.18072220 0.08284919 0.029158320
16: Weighted median 3 -0.81266431 0.38436112 0.034487522
17: Weighted median 81 -0.08588639 0.04191713 0.040466335
18: Weighted median 43 0.12735750 0.06310119 0.043559411
19: Weighted median 15 -0.27930089 0.13948435 0.045243716
>
> merged = merge(sigIVW, sigMREgger, by = "id.exposure")
> dim(merged)
[1] 3 18
> print(merged)
id.exposure id.outcome.x outcome.x
1: ebi-a-GCST002318 n4hbHD outcome
2: ebi-a-GCST005531 n4hbHD outcome
3: ebi-a-GCST005536 n4hbHD outcome
exposure.x method.x nsnp.x
1: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted 46
2: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted 43
3: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted 30
b.x se.x pval.x Padj.fdr id.outcome.y outcome.y
1: 0.3343471 0.07449680 7.19e-06 0.00069024 n4hbHD outcome
2: 0.1457677 0.04086487 3.61e-04 0.01732800 n4hbHD outcome
3: 0.3003068 0.06644808 6.20e-06 0.00069024 n4hbHD outcome
exposure.y method.y nsnp.y b.y
1: Rheumatoid arthritis || id:ebi-a-GCST002318 MR Egger 46 0.3764539
2: Multiple sclerosis || id:ebi-a-GCST005531 MR Egger 43 0.1697660
3: Type 1 diabetes || id:ebi-a-GCST005536 MR Egger 30 0.3086710
se.y pval.y
1: 0.15007957 0.01588935
2: 0.07595469 0.03092028
3: 0.11678593 0.01330352
> merged2 = merge(merged, sigWeightedmedian, by = "id.exposure")
> dim(merged2)
[1] 3 26
> print(merged2)
id.exposure id.outcome.x outcome.x
1: ebi-a-GCST002318 n4hbHD outcome
2: ebi-a-GCST005531 n4hbHD outcome
3: ebi-a-GCST005536 n4hbHD outcome
exposure.x method.x nsnp.x
1: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted 46
2: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted 43
3: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted 30
b.x se.x pval.x Padj.fdr id.outcome.y outcome.y
1: 0.3343471 0.07449680 7.19e-06 0.00069024 n4hbHD outcome
2: 0.1457677 0.04086487 3.61e-04 0.01732800 n4hbHD outcome
3: 0.3003068 0.06644808 6.20e-06 0.00069024 n4hbHD outcome
exposure.y method.y nsnp.y b.y
1: Rheumatoid arthritis || id:ebi-a-GCST002318 MR Egger 46 0.3764539
2: Multiple sclerosis || id:ebi-a-GCST005531 MR Egger 43 0.1697660
3: Type 1 diabetes || id:ebi-a-GCST005536 MR Egger 30 0.3086710
se.y pval.y id.outcome outcome
1: 0.15007957 0.01588935 n4hbHD outcome
2: 0.07595469 0.03092028 n4hbHD outcome
3: 0.11678593 0.01330352 n4hbHD outcome
exposure method nsnp b
1: Rheumatoid arthritis || id:ebi-a-GCST002318 Weighted median 46 0.3369727
2: Multiple sclerosis || id:ebi-a-GCST005531 Weighted median 43 0.1273575
3: Type 1 diabetes || id:ebi-a-GCST005536 Weighted median 30 0.1874440
se pval
1: 0.08614522 0.00009170
2: 0.06310119 0.04355941
3: 0.05384911 0.00049971
> toprint = merged2
> print(toprint)
id.exposure id.outcome.x outcome.x
1: ebi-a-GCST002318 n4hbHD outcome
2: ebi-a-GCST005531 n4hbHD outcome
3: ebi-a-GCST005536 n4hbHD outcome
exposure.x method.x nsnp.x
1: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted 46
2: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted 43
3: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted 30
b.x se.x pval.x Padj.fdr id.outcome.y outcome.y
1: 0.3343471 0.07449680 7.19e-06 0.00069024 n4hbHD outcome
2: 0.1457677 0.04086487 3.61e-04 0.01732800 n4hbHD outcome
3: 0.3003068 0.06644808 6.20e-06 0.00069024 n4hbHD outcome
exposure.y method.y nsnp.y b.y
1: Rheumatoid arthritis || id:ebi-a-GCST002318 MR Egger 46 0.3764539
2: Multiple sclerosis || id:ebi-a-GCST005531 MR Egger 43 0.1697660
3: Type 1 diabetes || id:ebi-a-GCST005536 MR Egger 30 0.3086710
se.y pval.y id.outcome outcome
1: 0.15007957 0.01588935 n4hbHD outcome
2: 0.07595469 0.03092028 n4hbHD outcome
3: 0.11678593 0.01330352 n4hbHD outcome
exposure method nsnp b
1: Rheumatoid arthritis || id:ebi-a-GCST002318 Weighted median 46 0.3369727
2: Multiple sclerosis || id:ebi-a-GCST005531 Weighted median 43 0.1273575
3: Type 1 diabetes || id:ebi-a-GCST005536 Weighted median 30 0.1874440
se pval
1: 0.08614522 0.00009170
2: 0.06310119 0.04355941
3: 0.05384911 0.00049971
> #id.exposure
> #ebi-a-GCST002318
> #ebi-a-GCST005531
> #ebi-a-GCST005536
>
> sigIVW = filter(sigIVW, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
> sigMREgger = filter(sigMREgger, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
> sigWeightedmedian = filter(sigWeightedmedian, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
>
> binded = rbind(sigIVW, sigMREgger, fill=TRUE)
> binded2 = rbind(binded, sigWeightedmedian, fill=TRUE)
>
>
>
> #save results with the significant traits for follow up
> write.table(binded2, "RESULTS.ebi-a.txt", quote = F, sep = ",", row.names = F)
>
> #open clean ple results file
> ple = fread("ebi-a-ALL.clean.ple.csv", header=T)
> ple = filter(ple, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
> head(ple)
id.exposure id.outcome outcome
1: ebi-a-GCST002318 n4hbHD outcome
2: ebi-a-GCST005531 n4hbHD outcome
3: ebi-a-GCST005536 n4hbHD outcome
exposure egger_intercept se
1: Rheumatoid arthritis || id:ebi-a-GCST002318 -0.006956471 0.02145285
2: Multiple sclerosis || id:ebi-a-GCST005531 -0.005026082 0.01335196
3: Type 1 diabetes || id:ebi-a-GCST005536 -0.002250609 0.02562195
pval
1: 0.7472718
2: 0.7085380
3: 0.9306298
> write.table(ple, "PLEITROPY.ebi-a.txt", quote = F, sep = ",", row.names = F)
>
> #open clean het results file
> het = fread("ebi-a-ALL.clean.het.csv", header=T)
> het = filter(het, id.exposure == "ebi-a-GCST002318" | id.exposure == "ebi-a-GCST005531" | id.exposure == "ebi-a-GCST005536")
> head(het)
id.exposure id.outcome outcome
1: ebi-a-GCST002318 n4hbHD outcome
2: ebi-a-GCST002318 n4hbHD outcome
3: ebi-a-GCST005531 n4hbHD outcome
4: ebi-a-GCST005531 n4hbHD outcome
5: ebi-a-GCST005536 n4hbHD outcome
6: ebi-a-GCST005536 n4hbHD outcome
exposure method
1: Rheumatoid arthritis || id:ebi-a-GCST002318 MR Egger
2: Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted
3: Multiple sclerosis || id:ebi-a-GCST005531 MR Egger
4: Multiple sclerosis || id:ebi-a-GCST005531 Inverse variance weighted
5: Type 1 diabetes || id:ebi-a-GCST005536 MR Egger
6: Type 1 diabetes || id:ebi-a-GCST005536 Inverse variance weighted
Q Q_df Q_pval V9
1: 129.7948925 44 2.130000e-10 NA
2: 130.1050718 45 3.360000e-10 NA
3: 49.27712243 41 1.758103e-01 NA
4: 49.44742883 42 2.002339e-01 NA
5: 99.55347684 28 5.980000e-10 NA
6: 99.58090989 29 1.140000e-09 NA
> write.table(het, "HETEROGENEITY.ebi-a.txt", quote = F, sep = ",", row.names = F)
>
>
>
[-] Unloading gcc 9.2.0 ... [-] Unloading GSL 2.6 for GCC 9.2.0 ... [-] Unloading openmpi 3.1.4 for GCC 9.2.0 [-] Unloading ImageMagick 7.0.8 on cn1013 [-] Unloading HDF5 1.10.4 [-] Unloading NetCDF 4.7.4_gcc9.2.0 [-] Unloading pandoc 2.11.4 on cn1013 [-] Unloading pcre2 10.21 ... [-] Unloading R 4.0.3 [+] Loading gcc 9.2.0 ... [+] Loading GSL 2.6 for GCC 9.2.0 ... [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading openmpi 3.1.4 for GCC 9.2.0 [+] Loading ImageMagick 7.0.8 on cn1013 [+] Loading HDF5 1.10.4 [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading NetCDF 4.7.4_gcc9.2.0 [+] Loading pandoc 2.11.4 on cn1013 [+] Loading pcre2 10.21 ... [+] Loading R 4.0.3 The following have been reloaded with a version change: 1) R/4.0 => R/4.0.3 ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ── ✔ ggplot2 3.3.3 ✔ purrr 0.3.4 ✔ tibble 3.1.0 ✔ dplyr 1.0.5 ✔ tidyr 1.1.3 ✔ stringr 1.4.0 ✔ readr 1.4.0 ✔ forcats 0.5.1 ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::between() masks data.table::between() ✖ dplyr::filter() masks stats::filter() ✖ dplyr::first() masks data.table::first() ✖ dplyr::lag() masks stats::lag() ✖ dplyr::last() masks data.table::last() ✖ purrr::transpose() masks data.table::transpose()
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/RESULTS.ebi-a.txt",sep=",")
res
| id.exposure | id.outcome | outcome | exposure | method | nsnp | b | se | pval | Padj.fdr | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ebi-a-GCST005536 | n4hbHD | outcome | Type 1 diabetes || id:ebi-a-GCST005536 | Inverse variance weighted | 30 | 0.300307 | 0.066448 | 0.000006 | 0.000690 |
| 1 | ebi-a-GCST002318 | n4hbHD | outcome | Rheumatoid arthritis || id:ebi-a-GCST002318 | Inverse variance weighted | 46 | 0.334347 | 0.074497 | 0.000007 | 0.000690 |
| 2 | ebi-a-GCST005531 | n4hbHD | outcome | Multiple sclerosis || id:ebi-a-GCST005531 | Inverse variance weighted | 43 | 0.145768 | 0.040865 | 0.000361 | 0.017328 |
| 3 | ebi-a-GCST005536 | n4hbHD | outcome | Type 1 diabetes || id:ebi-a-GCST005536 | MR Egger | 30 | 0.308671 | 0.116786 | 0.013304 | NaN |
| 4 | ebi-a-GCST002318 | n4hbHD | outcome | Rheumatoid arthritis || id:ebi-a-GCST002318 | MR Egger | 46 | 0.376454 | 0.150080 | 0.015889 | NaN |
| 5 | ebi-a-GCST005531 | n4hbHD | outcome | Multiple sclerosis || id:ebi-a-GCST005531 | MR Egger | 43 | 0.169766 | 0.075955 | 0.030920 | NaN |
| 6 | ebi-a-GCST002318 | n4hbHD | outcome | Rheumatoid arthritis || id:ebi-a-GCST002318 | Weighted median | 46 | 0.336973 | 0.086145 | 0.000092 | NaN |
| 7 | ebi-a-GCST005536 | n4hbHD | outcome | Type 1 diabetes || id:ebi-a-GCST005536 | Weighted median | 30 | 0.187444 | 0.053849 | 0.000500 | NaN |
| 8 | ebi-a-GCST005531 | n4hbHD | outcome | Multiple sclerosis || id:ebi-a-GCST005531 | Weighted median | 43 | 0.127357 | 0.063101 | 0.043559 | NaN |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/HETEROGENEITY.ebi-a.txt",sep=",")
res
| id.exposure | id.outcome | outcome | exposure | method | Q | Q_df | Q_pval | V9 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | ebi-a-GCST002318 | n4hbHD | outcome | Rheumatoid arthritis || id:ebi-a-GCST002318 | MR Egger | 129.794893 | 44 | 2.130000e-10 | NaN |
| 1 | ebi-a-GCST002318 | n4hbHD | outcome | Rheumatoid arthritis || id:ebi-a-GCST002318 | Inverse variance weighted | 130.105072 | 45 | 3.360000e-10 | NaN |
| 2 | ebi-a-GCST005531 | n4hbHD | outcome | Multiple sclerosis || id:ebi-a-GCST005531 | MR Egger | 49.277122 | 41 | 1.758103e-01 | NaN |
| 3 | ebi-a-GCST005531 | n4hbHD | outcome | Multiple sclerosis || id:ebi-a-GCST005531 | Inverse variance weighted | 49.447429 | 42 | 2.002339e-01 | NaN |
| 4 | ebi-a-GCST005536 | n4hbHD | outcome | Type 1 diabetes || id:ebi-a-GCST005536 | MR Egger | 99.553477 | 28 | 5.980000e-10 | NaN |
| 5 | ebi-a-GCST005536 | n4hbHD | outcome | Type 1 diabetes || id:ebi-a-GCST005536 | Inverse variance weighted | 99.580910 | 29 | 1.140000e-09 | NaN |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/PLEITROPY.ebi-a.txt",sep=",")
res
| id.exposure | id.outcome | outcome | exposure | egger_intercept | se | pval | |
|---|---|---|---|---|---|---|---|
| 0 | ebi-a-GCST002318 | n4hbHD | outcome | Rheumatoid arthritis || id:ebi-a-GCST002318 | -0.006956 | 0.021453 | 0.747272 |
| 1 | ebi-a-GCST005531 | n4hbHD | outcome | Multiple sclerosis || id:ebi-a-GCST005531 | -0.005026 | 0.013352 | 0.708538 |
| 2 | ebi-a-GCST005536 | n4hbHD | outcome | Type 1 diabetes || id:ebi-a-GCST005536 | -0.002251 | 0.025622 | 0.930630 |
Select the traits with IVW FDR-corrected p-value < 0.05 and Weighted median and MR Egger row pvalue < 0.05. For the selected traits, get pleitropy and heterogeneity results too. NO traits were selected
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results/ieu-a.DB
cat *_ieu_a_res.txt > ieu-a-ALL.res.csv
cat *_ieu_a_het.txt > ieu-a-ALL.het.csv
cat *_ieu_a_ple.txt > ieu-a-ALL.ple.csv
#open in excel remove multiples headers
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results
# ieu-a-ALL.clean.res.csv
# ieu-a-ALL.clean.het.csv
# ieu-a-ALL.clean.ple.csv
module load R/4.0.3
R --vanilla --no-save
# Load libraries
library(data.table)
library(tidyverse)
library(dplyr)
#open clean ukb-a-ALL file witht the results
data = fread("ieu-a-ALL.clean.res.csv", header = T)
head(data)
#used methods
table(data$method)
exposures <- unique(data$exposure)
numberOfexposures <- length(exposures)
#how many exposure do we haver
numberOfexposures
#filter using IVW
IVW = filter(data, method == "Inverse variance weighted")
dim(IVW)
head(IVW)
Padj.fdr = p.adjust(IVW$pval, method = "fdr")
table(Padj.fdr)
temp = cbind(IVW, Padj.fdr)
head(temp)
temp2 <- temp[order(Padj.fdr),]
head(temp2)
#save FDR corrected IVW results
write.csv(temp2, "ieu-a.IVW.FDR.ALL.csv")
# Select the traits with MR Egger and Median weight significant too.
sigIVW = filter(temp2,Padj.fdr < 0.05)
dim(sigIVW)
print(sigIVW)
sigMREgger = filter(data, method == "MR Egger")
sigMREgger = filter(sigMREgger,pval < 0.05)
dim(sigMREgger)
print(sigMREgger)
sigWeightedmedian = filter(data, method == "Weighted median")
sigWeightedmedian = filter(sigWeightedmedian,pval < 0.05)
dim(sigWeightedmedian)
print(sigWeightedmedian)
merged = merge(sigIVW, sigMREgger, by = "id.exposure")
dim(merged)
print(merged)
merged2 = merge(merged, sigWeightedmedian, by = "id.exposure")
dim(merged2)
print(merged2)
toprint = merged2
print(toprint)
# nothing significant following the stablished treshold
R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> # Load libraries
> library(data.table)
> library(tidyverse)
> library(dplyr)
> #open clean ukb-a-ALL file witht the results
> data = fread("ieu-a-ALL.clean.res.csv", header = T)
> head(data)
id.exposure id.outcome outcome exposure
1: ieu-a-1000 p6vvAY outcome Depressive symptoms || id:ieu-a-1000
2: ieu-a-1001 p6vvAY outcome Years of schooling || id:ieu-a-1001
3: ieu-a-1001 p6vvAY outcome Years of schooling || id:ieu-a-1001
4: ieu-a-1001 p6vvAY outcome Years of schooling || id:ieu-a-1001
5: ieu-a-1001 p6vvAY outcome Years of schooling || id:ieu-a-1001
6: ieu-a-1001 p6vvAY outcome Years of schooling || id:ieu-a-1001
method nsnp b se pval
1: Wald ratio 1 -1.0483871 1.5354839 0.49475022
2: MR Egger 68 -1.6176163 1.6801005 0.33915767
3: Weighted median 68 -0.8741709 0.4053181 0.03102495
4: Inverse variance weighted 68 -0.4048829 0.3266961 0.21522486
5: Simple mode 68 -1.0943937 0.9597497 0.25822510
6: Weighted mode 68 -1.2146839 0.9244726 0.19335389
> #used methods
> table(data$method)
Inverse variance weighted MR Egger
1 139 122
Simple mode Wald ratio Weighted median
122 39 122
Weighted mode
122
> exposures <- unique(data$exposure)
> numberOfexposures <- length(exposures)
> #how many exposure do we haver
> numberOfexposures
[1] 179
>
> #filter using IVW
> IVW = filter(data, method == "Inverse variance weighted")
> dim(IVW)
[1] 139 9
> head(IVW)
id.exposure id.outcome outcome exposure
1: ieu-a-1001 p6vvAY outcome Years of schooling || id:ieu-a-1001
2: ieu-a-1004 p6vvAY outcome Age at menopause || id:ieu-a-1004
3: ieu-a-1005 p6vvAY outcome Percent emphysema || id:ieu-a-1005
4: ieu-a-1006 p6vvAY outcome Mean platelet volume || id:ieu-a-1006
5: ieu-a-1007 p6vvAY outcome Neuroticism || id:ieu-a-1007
6: ieu-a-1008 p6vvAY outcome Platelet count || id:ieu-a-1008
method nsnp b se pval
1: Inverse variance weighted 68 -0.404882873 0.326696069 0.21522486
2: Inverse variance weighted 36 -0.012497805 0.047712107 0.79336616
3: Inverse variance weighted 2 -1.049613083 0.521934235 0.04432478
4: Inverse variance weighted 23 0.230402159 0.764784477 0.76321306
5: Inverse variance weighted 10 0.670482131 0.564269475 0.23474266
6: Inverse variance weighted 28 0.001552017 0.002534372 0.54028153
> Padj.fdr = p.adjust(IVW$pval, method = "fdr")
> table(Padj.fdr)
Padj.fdr
0.23515603 0.244094703 0.334051320363636 0.440081764142857
3 1 7 3
0.4528405662 0.487997221125 0.533173755235294 0.636135927866667
1 1 1 13
0.810221221511628 0.822491280181818 0.845878235071429 0.880389251590164
13 12 1 5
0.911330829548387 0.955656668656489 0.96253903662406 0.973848121
1 69 2 6
> temp = cbind(IVW, Padj.fdr)
> head(temp)
id.exposure id.outcome outcome exposure
1: ieu-a-1001 p6vvAY outcome Years of schooling || id:ieu-a-1001
2: ieu-a-1004 p6vvAY outcome Age at menopause || id:ieu-a-1004
3: ieu-a-1005 p6vvAY outcome Percent emphysema || id:ieu-a-1005
4: ieu-a-1006 p6vvAY outcome Mean platelet volume || id:ieu-a-1006
5: ieu-a-1007 p6vvAY outcome Neuroticism || id:ieu-a-1007
6: ieu-a-1008 p6vvAY outcome Platelet count || id:ieu-a-1008
method nsnp b se pval Padj.fdr
1: Inverse variance weighted 68 -0.404882873 0.326696069 0.21522486 0.8102212
2: Inverse variance weighted 36 -0.012497805 0.047712107 0.79336616 0.9556567
3: Inverse variance weighted 2 -1.049613083 0.521934235 0.04432478 0.4400818
4: Inverse variance weighted 23 0.230402159 0.764784477 0.76321306 0.9556567
5: Inverse variance weighted 10 0.670482131 0.564269475 0.23474266 0.8102212
6: Inverse variance weighted 28 0.001552017 0.002534372 0.54028153 0.9556567
> temp2 <- temp[order(Padj.fdr),]
> head(temp2)
id.exposure id.outcome outcome
1: ieu-a-1060 p6vvAY outcome
2: ieu-a-1132 p6vvAY outcome
3: ieu-a-1239 p6vvAY outcome
4: ieu-a-1024 p6vvAY outcome
5: ieu-a-1010 p6vvAY outcome
6: ieu-a-1025 p6vvAY outcome
exposure method
1: Celiac disease || id:ieu-a-1060 Inverse variance weighted
2: ER+ Breast cancer (Oncoarray) || id:ieu-a-1132 Inverse variance weighted
3: Years of schooling || id:ieu-a-1239 Inverse variance weighted
4: Multiple sclerosis || id:ieu-a-1024 Inverse variance weighted
5: Years of schooling || id:ieu-a-1010 Inverse variance weighted
6: Multiple sclerosis || id:ieu-a-1025 Inverse variance weighted
nsnp b se pval Padj.fdr
1: 6 0.2194779 0.07630510 0.004023417 0.2351560
2: 52 0.1562151 0.05574702 0.005075310 0.2351560
3: 285 -0.5969171 0.19282937 0.001964339 0.2351560
4: 25 0.1624268 0.06025427 0.007024308 0.2440947
5: 14 -1.0169147 0.43363713 0.019022884 0.3340513
6: 46 0.1019439 0.04513746 0.023913180 0.3340513
> #save FDR corrected IVW results
> write.csv(temp2, "ieu-a.IVW.FDR.ALL.csv")
>
> # Select the traits with MR Egger and Median weight significant too.
>
> sigIVW = filter(temp2,Padj.fdr < 0.05)
> dim(sigIVW)
[1] 0 10
> print(sigIVW)
Empty data.table (0 rows and 10 cols): id.exposure,id.outcome,outcome,exposure,method,nsnp...
>
> sigMREgger = filter(data, method == "MR Egger")
> sigMREgger = filter(sigMREgger,pval < 0.05)
> dim(sigMREgger)
[1] 4 9
> print(sigMREgger)
id.exposure id.outcome outcome exposure
1: ieu-a-1166 p6vvAY outcome ER- Breast cancer (GWAS) || id:ieu-a-1166
2: ieu-a-23 p6vvAY outcome Type 2 diabetes || id:ieu-a-23
3: ieu-a-66 p6vvAY outcome Waist circumference || id:ieu-a-66
4: ieu-a-67 p6vvAY outcome Waist circumference || id:ieu-a-67
method nsnp b se pval
1: MR Egger 7 2.1231842 0.7971919 0.04470490
2: MR Egger 25 -0.9498167 0.3779994 0.01943928
3: MR Egger 60 2.3223055 0.9968828 0.02333217
4: MR Egger 58 1.9868647 0.9913356 0.04989076
>
> sigWeightedmedian = filter(data, method == "Weighted median")
> sigWeightedmedian = filter(sigWeightedmedian,pval < 0.05)
> dim(sigWeightedmedian)
[1] 13 9
> print(sigWeightedmedian)
id.exposure id.outcome outcome
1: ieu-a-1001 p6vvAY outcome
2: ieu-a-1024 p6vvAY outcome
3: ieu-a-1059 p6vvAY outcome
4: ieu-a-1060 p6vvAY outcome
5: ieu-a-1126 p6vvAY outcome
6: ieu-a-1127 p6vvAY outcome
7: ieu-a-1129 p6vvAY outcome
8: ieu-a-1132 p6vvAY outcome
9: ieu-a-1233 p6vvAY outcome
10: ieu-a-1239 p6vvAY outcome
11: ieu-a-276 p6vvAY outcome
12: ieu-a-64 p6vvAY outcome
13: ieu-a-66 p6vvAY outcome
exposure
1: Years of schooling || id:ieu-a-1001
2: Multiple sclerosis || id:ieu-a-1024
3: Celiac disease || id:ieu-a-1059
4: Celiac disease || id:ieu-a-1060
5: Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) || id:ieu-a-1126
6: ER+ Breast cancer (Combined Oncoarray; iCOGS; GWAS meta analysis) || id:ieu-a-1127
7: Breast cancer (Oncoarray) || id:ieu-a-1129
8: ER+ Breast cancer (Oncoarray) || id:ieu-a-1132
9: Low malignant potential ovarian cancer || id:ieu-a-1233
10: Years of schooling || id:ieu-a-1239
11: Celiac disease || id:ieu-a-276
12: Waist circumference || id:ieu-a-64
13: Waist circumference || id:ieu-a-66
method nsnp b se pval
1: Weighted median 68 -0.8741709 0.40531813 0.031024952
2: Weighted median 25 0.1488261 0.06920375 0.031511807
3: Weighted median 11 0.2022635 0.08506003 0.017411966
4: Weighted median 6 0.2718987 0.09303346 0.003471327
5: Weighted median 118 0.2165049 0.09584901 0.023895072
6: Weighted median 86 0.1970291 0.09123015 0.030796460
7: Weighted median 55 0.2245652 0.09329588 0.016083227
8: Weighted median 52 0.2059731 0.08743332 0.018484157
9: Weighted median 4 0.2810805 0.13094233 0.031825328
10: Weighted median 285 -0.6517569 0.27729652 0.018753406
11: Weighted median 12 0.1807115 0.08373889 0.030925116
12: Weighted median 16 1.1921918 0.38175267 0.001790525
13: Weighted median 60 0.6620221 0.31615740 0.036263210
>
> merged = merge(sigIVW, sigMREgger, by = "id.exposure")
> dim(merged)
[1] 0 18
> print(merged)
Empty data.table (0 rows and 18 cols): id.exposure,id.outcome.x,outcome.x,exposure.x,method.x,nsnp.x...
> merged2 = merge(merged, sigWeightedmedian, by = "id.exposure")
> dim(merged2)
[1] 0 26
> print(merged2)
Empty data.table (0 rows and 26 cols): id.exposure,id.outcome.x,outcome.x,exposure.x,method.x,nsnp.x...
> toprint = merged2
> print(toprint)
Empty data.table (0 rows and 26 cols): id.exposure,id.outcome.x,outcome.x,exposure.x,method.x,nsnp.x...
> # nothing significant following the stablished treshold
>
[-] Unloading gcc 9.2.0 ... [-] Unloading GSL 2.6 for GCC 9.2.0 ... [-] Unloading openmpi 3.1.4 for GCC 9.2.0 [-] Unloading ImageMagick 7.0.8 on cn1013 [-] Unloading HDF5 1.10.4 [-] Unloading NetCDF 4.7.4_gcc9.2.0 [-] Unloading pandoc 2.11.4 on cn1013 [-] Unloading pcre2 10.21 ... [-] Unloading R 4.0.3 [+] Loading gcc 9.2.0 ... [+] Loading GSL 2.6 for GCC 9.2.0 ... [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading openmpi 3.1.4 for GCC 9.2.0 [+] Loading ImageMagick 7.0.8 on cn1013 [+] Loading HDF5 1.10.4 [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading NetCDF 4.7.4_gcc9.2.0 [+] Loading pandoc 2.11.4 on cn1013 [+] Loading pcre2 10.21 ... [+] Loading R 4.0.3 The following have been reloaded with a version change: 1) R/4.0 => R/4.0.3 ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ── ✔ ggplot2 3.3.3 ✔ purrr 0.3.4 ✔ tibble 3.1.0 ✔ dplyr 1.0.5 ✔ tidyr 1.1.3 ✔ stringr 1.4.0 ✔ readr 1.4.0 ✔ forcats 0.5.1 ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::between() masks data.table::between() ✖ dplyr::filter() masks stats::filter() ✖ dplyr::first() masks data.table::first() ✖ dplyr::lag() masks stats::lag() ✖ dplyr::last() masks data.table::last() ✖ purrr::transpose() masks data.table::transpose()
Plots and radial MR with each of the selected traits
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data
echo "id
ebi-a-GCST002318
ebi-a-GCST005531
ebi-a-GCST005536
ukb-a-190
ukb-a-77" > SignificantExposures.txt
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/
#mkdir Plots
Module load R/4.0.3
R --vanilla --no-save
#load packages
library(data.table)
library(TwoSampleMR)
library(MRInstruments)
library(RadialMR)
library(ieugwasr)
library(ggplot2)
library(gplots)
library(ggpubr)
library(cowplot)
library(svglite)
MGtemp = fread("/data/NDRS_LNG/MyastheniaGravis/updated.April2020/MetaAnalysis.hg38.ByCohort.rerun/USmerged.Itals.hg38/META_MG.USmerged.Itals.UNRELATED.hg38.Rsq03.glm_filteredDirection.HetISq80MAF001cases.rsid.txt", header = T)
head(MGtemp)
MGtemp$SNP = MGtemp$rsID
MGtemp$Freq1 = MGtemp$maf_EA.CASE
MGtemp$effect_allele = MGtemp$EffectAllele
MGtemp$other_allele = MGtemp$OtherAllele
MGtemp$Beta = MGtemp$beta
MGtemp$P.value = MGtemp$P
Out_data = format_data(MGtemp,type="outcome", beta_col = "Beta", se_col = "StdErr", eaf_col = "Freq1", pval_col = "P.value")
#the token will be different each time, it needs to be prepare at the moment
token <- "ya29.a0AfH6SMD0mAzH9QTlCbaSvicyV5-9AmVSyMEvzIlN5KcgZB3Q867FlPRT90EwutMt33pxRvHsehVXWvvJ7xHKZQBp5kj7zDtZoeAg0xspeBOhVhQVXPaYyrdi6Yx8LMI-mZuEPzRIlq3xCRzn6dvuZl1LEVaYgA"
#create a list with the GWAS id, this can be obtained from the ieu OpenGwas Project (https://gwas.mrcieu.ac.uk)
listOfGwasIds <- read.table("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data/SignificantExposures.txt", header = T)
# ebi-a-GCST002318
for(i in 1:1)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
#write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
#write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
#write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
#p2
#write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
# Plots
#res <- mr(dat) # to show only a few of them
mr_scatter_plot(res, dat)
ggsave("Plots/MG.ebi-a-GCST002318.Scatter_plot.jpeg", width=7, height=7)
mr_funnel_plot(res_single)
ggsave("Plots/MG.ebi-a-GCST002318.Funnel_plot.jpeg", width=7, height=7)
res_single <- mr_singlesnp(dat)
mr_forest_plot(res_single)
ggsave("Plots/MG.ebi-a-GCST002318.Forest_plot.singleSNP.jpeg", width=7, height=7)
res_loo <- mr_leaveoneout(dat)
write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ebi-a-GCST002318.LOO.txt",sep = ""), quote = F, sep = ",")
mr_forest_plot(res_loo)
ggsave("Plots/MG.ebi-a-GCST002318.Forest_plot.LOO.jpeg", width=7, height=7)
#RADIAL analysis to detect and remove outliers
r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
ivw <- ivw_radial(r_input,0.05,2)
ivw.radial = as.data.table(ivw$coef)
coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
ivw.radial = cbind(coef, ivw.radial)
outliers = as.data.table(ivw$outliers)
data = as.data.table(ivw$data)
write.csv(ivw.radial, "Results/MG.ebi-a-GCST002318.radial.ivw.csv", row.names = F)
write.csv(outliers, "Results/MG.ebi-a-GCST002318.radial.ivwr.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ebi-a-GCST002318.radial.ivw.allsnps.csv", row.names = F)
egger <- egger_radial(r_input,0.05,2)
radial.egger = as.data.table(egger$coef)
coef = c("Intercept", "Wj")
radial.egger = cbind(coef,radial.egger )
outliers = as.data.table(egger$outliers)
data = as.data.table(egger$data)
write.csv(radial.egger, "Results/MG.ebi-a-GCST002318.radial.egger.csv", row.names = F)
write.csv(outliers, "Results/MG.ebi-a-GCST002318.radial.egger.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ebi-a-GCST002318.radial.egger.allsnps.csv", row.names = F)
R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> #load packages
> library(data.table)
> library(TwoSampleMR)
> library(MRInstruments)
> library(RadialMR)
> library(ieugwasr)
> library(ggplot2)
> library(gplots)
> library(ggpubr)
> library(cowplot)
> library(svglite)
>
> MGtemp = fread("/data/NDRS_LNG/MyastheniaGravis/updated.April2020/MetaAnalysis.hg38.ByCohort.rerun/USmerged.Itals.hg38/META_MG.USmerged.Itals.UNRELATED.hg38.Rsq03.glm_filteredDirection.HetISq80MAF001cases.rsid.txt", header = T)
> head(MGtemp)
MarkerName CHROM POS OtherAllele EffectAllele maf_EA.CASE maf_EA.CTRL
1: 1:722408:C:G 1 722408 C G 0.16310731 0.15488040
2: 1:722700:G:A 1 722700 G A 0.01308062 0.01739071
3: 1:727233:G:A 1 727233 G A 0.01094501 0.01520484
4: 1:727242:G:A 1 727242 G A 0.08168713 0.09434974
5: 1:727717:C:G 1 727717 C G 0.14468767 0.13229310
6: 1:758351:A:G 1 758351 A G 0.08061933 0.09459720
beta StdErr P Direction HetISq rsID
1: 0.0594 0.0495 0.22990 -+ 68.7 rs75935175
2: -0.2150 0.1581 0.17400 -- 0.0 rs1415481957
3: -0.0362 0.1687 0.82990 +- 0.0 rs151190501
4: 0.1044 0.0661 0.11400 ++ 0.0 rs61769339
5: 0.1063 0.0522 0.04161 -- 50.8 rs61769340
6: 0.1025 0.0663 0.12210 -- 0.0 rs12238997
> MGtemp$SNP = MGtemp$rsID
> MGtemp$Freq1 = MGtemp$maf_EA.CASE
> MGtemp$effect_allele = MGtemp$EffectAllele
> MGtemp$other_allele = MGtemp$OtherAllele
> MGtemp$Beta = MGtemp$beta
> MGtemp$P.value = MGtemp$P
>
> Out_data = format_data(MGtemp,type="outcome", beta_col = "Beta", se_col = "StdErr", eaf_col = "Freq1", pval_col = "P.value")
>
> #the token will be different each time, it needs to be prepare at the moment
> token <- "ya29.a0AfH6SMD0mAzH9QTlCbaSvicyV5-9AmVSyMEvzIlN5KcgZB3Q867FlPRT90EwutMt33pxRvHsehVXWvvJ7xHKZQBp5kj7zDtZoeAg0xspeBOhVhQVXPaYyrdi6Yx8LMI-mZuEPzRIlq3xCRzn6dvuZl1LEVaYgA"
>
> #create a list with the GWAS id, this can be obtained from the ieu OpenGwas Project (https://gwas.mrcieu.ac.uk)
> listOfGwasIds <- read.table("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data/SignificantExposures.txt", header = T)
>
> # ebi-a-GCST002318
> for(i in 1:1)
+ {
+ instrumentId <- as.character(listOfGwasIds$id[i])
+ tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
+ print(tag)
+ flagged <- "nope"
+ Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
+ r2 = 0.001, kb = 10000, access_token = token,
+ force_server = TRUE)
+ skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
+ if(skip == 0)
+ {
+ dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
+ res <-mr(dat)
+ print(res)
+ het <- mr_heterogeneity(dat)
+ print(het)
+ ple <- mr_pleiotropy_test(dat)
+ print(ple)
+ #write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
+ #write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
+ #write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
+ res_single <- mr_singlesnp(dat)
+ p2 <- mr_forest_plot(res_single)
+ #p2
+ #write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
+ }
+ else
+ {
+ print("FAIL")
+ }
+ }
[1] "INSTRUMENT IS ebi-a-GCST002318 AT i = 1"
id.exposure id.outcome outcome
1 ebi-a-GCST002318 OJeEKA outcome
2 ebi-a-GCST002318 OJeEKA outcome
3 ebi-a-GCST002318 OJeEKA outcome
4 ebi-a-GCST002318 OJeEKA outcome
5 ebi-a-GCST002318 OJeEKA outcome
exposure method nsnp
1 Rheumatoid arthritis || id:ebi-a-GCST002318 MR Egger 46
2 Rheumatoid arthritis || id:ebi-a-GCST002318 Weighted median 46
3 Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted 46
4 Rheumatoid arthritis || id:ebi-a-GCST002318 Simple mode 46
5 Rheumatoid arthritis || id:ebi-a-GCST002318 Weighted mode 46
b se pval
1 0.3764539 0.15007957 1.588935e-02
2 0.3369727 0.08615844 9.188362e-05
3 0.3343471 0.07449680 7.187001e-06
4 0.2742467 0.19350427 1.632925e-01
5 0.4508013 0.12947491 1.119908e-03
id.exposure id.outcome outcome
1 ebi-a-GCST002318 OJeEKA outcome
2 ebi-a-GCST002318 OJeEKA outcome
exposure method
1 Rheumatoid arthritis || id:ebi-a-GCST002318 MR Egger
2 Rheumatoid arthritis || id:ebi-a-GCST002318 Inverse variance weighted
Q Q_df Q_pval
1 129.7949 44 2.134010e-10
2 130.1051 45 3.355752e-10
id.exposure id.outcome outcome
1 ebi-a-GCST002318 OJeEKA outcome
exposure egger_intercept se
1 Rheumatoid arthritis || id:ebi-a-GCST002318 -0.006956471 0.02145285
pval
1 0.7472718
>
> # Plots
>
> #res <- mr(dat) # to show only a few of them
> mr_scatter_plot(res, dat)
$`ebi-a-GCST002318.OJeEKA`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST002318 OJeEKA
> ggsave("Plots/MG.ebi-a-GCST002318.Scatter_plot.jpeg", width=7, height=7)
>
> mr_funnel_plot(res_single)
$`ebi-a-GCST002318.OJeEKA`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST002318 OJeEKA
> ggsave("Plots/MG.ebi-a-GCST002318.Funnel_plot.jpeg", width=7, height=7)
>
>
> res_single <- mr_singlesnp(dat)
> mr_forest_plot(res_single)
$`ebi-a-GCST002318.OJeEKA`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST002318 OJeEKA
> ggsave("Plots/MG.ebi-a-GCST002318.Forest_plot.singleSNP.jpeg", width=7, height=7)
>
>
>
>
> res_loo <- mr_leaveoneout(dat)
> write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ebi-a-GCST002318.LOO.txt",sep = ""), quote = F, sep = ",")
> mr_forest_plot(res_loo)
$`ebi-a-GCST002318.OJeEKA`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST002318 OJeEKA
> ggsave("Plots/MG.ebi-a-GCST002318.Forest_plot.LOO.jpeg", width=7, height=7)
>
> #RADIAL analysis to detect and remove outliers
>
> r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
> ivw <- ivw_radial(r_input,0.05,2)
Radial IVW
Estimate Std.Error t value Pr(>|t|)
Effect (2nd) 0.2251425 0.06187024 3.638946 2.737562e-04
Iterative 0.2453897 0.06564881 3.737916 1.855520e-04
Exact (FE) 0.2530041 0.03818153 6.626350 3.440899e-11
Exact (RE) 0.2480953 0.08734021 2.840562 6.414810e-03
Residual standard error: 1.599 on 52 degrees of freedom
F-statistic: 13.24 on 1 and 52 DF, p-value: 0.00063
Q-Statistic for heterogeneity: 133.0285 on 52 DF , p-value: 4.946921e-09
Outliers detected
Number of iterations = 3
> ivw.radial = as.data.table(ivw$coef)
> coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
> ivw.radial = cbind(coef, ivw.radial)
> outliers = as.data.table(ivw$outliers)
> data = as.data.table(ivw$data)
>
> write.csv(ivw.radial, "Results/MG.ebi-a-GCST002318.radial.ivw.csv", row.names = F)
> write.csv(outliers, "Results/MG.ebi-a-GCST002318.radial.ivwr.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ebi-a-GCST002318.radial.ivw.allsnps.csv", row.names = F)
>
>
>
> egger <- egger_radial(r_input,0.05,2)
Radial MR-Egger
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2209348 0.4543621 0.4862528 0.6288709
Wj 0.1708059 0.1279532 1.3349095 0.1878347
Residual standard error: 1.611 on 51 degrees of freedom
F-statistic: 1.78 on 1 and 51 DF, p-value: 0.188
Q-Statistic for heterogeneity: 132.4146 on 51 DF , p-value: 3.666467e-09
Outliers detected
> radial.egger = as.data.table(egger$coef)
> coef = c("Intercept", "Wj")
> radial.egger = cbind(coef,radial.egger )
> outliers = as.data.table(egger$outliers)
> data = as.data.table(egger$data)
> write.csv(radial.egger, "Results/MG.ebi-a-GCST002318.radial.egger.csv", row.names = F)
> write.csv(outliers, "Results/MG.ebi-a-GCST002318.radial.egger.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ebi-a-GCST002318.radial.egger.allsnps.csv", row.names = F)
>
>
>
bash: line 12: Module: command not found
TwoSampleMR version 0.5.5
[>] New: Option to use non-European LD reference panels for clumping etc
[>] Some studies temporarily quarantined to verify effect allele
[>] See news(package='TwoSampleMR') and https://gwas.mrcieu.ac.uk for further details
API: public: http://gwas-api.mrcieu.ac.uk/
Attaching package: ‘ieugwasr’
The following object is masked from ‘package:TwoSampleMR’:
ld_matrix
Attaching package: ‘gplots’
The following object is masked from ‘package:stats’:
lowess
Attaching package: ‘cowplot’
The following object is masked from ‘package:ggpubr’:
get_legend
No phenotype name specified, defaulting to 'outcome'.
Warning message:
In .fun(piece, ...) :
Duplicated SNPs present in exposure data for phenotype 'outcome. Just keeping the first instance:
rs145226353
rs116080943
rs79346640
rs5772990
rs145411679
rs142033339
rs150218993
rs59254935
rs58682971
rs113702600
rs114898411
rs1889702
rs11338667
rs76955023
rs140270114
rs80204938
rs1172532657
rs6703322
rs151105246
rs115847127
rs75815237
rs17419922
rs12078034
rs7553955
rs12040457
rs116379201
rs74713565
rs74854435
rs17544210
rs10465606
rs16838089
rs576963479
rs146567885
rs143857075
rs35741345
rs76302207
rs2609478
rs12029935
rs28535260
rs111961225
rs12079573
rs3767296
rs17188402
rs539821297
rs11581625
rs72431984
rs138610483
rs369158927
rs149604003
rs116532476
rs28715399
rs12081130
rs35380268
rs376075070
rs116683227
rs76439953
rs74765817
rs75813712
rs1345612268
rs371690583
rs12027758
rs7527025
rs116189815
rs75337743
rs72783268
rs56252443
rs117500436
rs6713932
rs57358917
rs163532
rs76224375
rs71423929
rs71407542
rs193249100
rs116755276
rs10490245
rs79656435
rs59532310
rs79491842
rs72827351
r [... truncated]
Harmonising Rheumatoid arthritis || id:ebi-a-GCST002318 (ebi-a-GCST002318) and outcome (OJeEKA)
Removing the following SNPs for being palindromic with intermediate allele frequencies:
rs13330176, rs141202333, rs168962, rs74956615, rs909685, rs9277956, rs9296009
Analysing 'ebi-a-GCST002318' on 'OJeEKA'
Warning message:
Ignoring unknown aesthetics: text
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/
Module load R/4.0.3
R --vanilla --no-save
#load packages
library(data.table)
library(TwoSampleMR)
library(MRInstruments)
library(RadialMR)
library(ieugwasr)
library(ggplot2)
library(gplots)
library(ggpubr)
library(cowplot)
library(svglite)
MGtemp = fread("/data/NDRS_LNG/MyastheniaGravis/updated.April2020/MetaAnalysis.hg38.ByCohort.rerun/USmerged.Itals.hg38/META_MG.USmerged.Itals.UNRELATED.hg38.Rsq03.glm_filteredDirection.HetISq80MAF001cases.rsid.txt", header = T)
head(MGtemp)
MGtemp$SNP = MGtemp$rsID
MGtemp$Freq1 = MGtemp$maf_EA.CASE
MGtemp$effect_allele = MGtemp$EffectAllele
MGtemp$other_allele = MGtemp$OtherAllele
MGtemp$Beta = MGtemp$beta
MGtemp$P.value = MGtemp$P
Out_data = format_data(MGtemp,type="outcome", beta_col = "Beta", se_col = "StdErr", eaf_col = "Freq1", pval_col = "P.value")
#the token will be different each time, it needs to be prepare at the moment
token <- "ya29.a0AfH6SMAo__Nj5QnvHSW_bn9MUwi_bxPo-EIMXXw0GOXlbTx-Q0LQxTvUFyo8QM0T9uQW_gQUHA5LIM4LPI0dnHcA5k6HSKh_vW-78muxmPlY3i68r95LzdQyxAVJIc_ftK1nS27z2D1uQvr8JSk5CG-_CHiQBQ"
#create a list with the GWAS id, this can be obtained from the ieu OpenGwas Project (https://gwas.mrcieu.ac.uk)
listOfGwasIds <- read.table("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data/SignificantExposures.txt", header = T)
# ebi-a-GCST005531
for(i in 2:2)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
#write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
#write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
#write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
#p2
#write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
# Plots
#res <- mr(dat) # to show only a few of them
mr_scatter_plot(res, dat)
ggsave("Plots/MG.ebi-a-GCST005531.Scatter_plot.jpeg", width=7, height=7)
mr_funnel_plot(res_single)
ggsave("Plots/MG.ebi-a-GCST005531.Funnel_plot.jpeg", width=7, height=7)
res_single <- mr_singlesnp(dat)
mr_forest_plot(res_single)
ggsave("Plots/MG.ebi-a-GCST005531.Forest_plot.singleSNP.jpeg", width=7, height=7)
res_loo <- mr_leaveoneout(dat)
write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ebi-a-GCST002318.LOO.txt",sep = ""), quote = F, sep = ",")
mr_forest_plot(res_loo)
ggsave("Plots/MG.ebi-a-GCST005531.Forest_plot.LOO.jpeg", width=7, height=7)
#RADIAL analysis to detect and remove outliers
r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
ivw <- ivw_radial(r_input,0.05,2)
ivw.radial = as.data.table(ivw$coef)
coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
ivw.radial = cbind(coef, ivw.radial)
outliers = as.data.table(ivw$outliers)
data = as.data.table(ivw$data)
write.csv(ivw.radial, "Results/MG.ebi-a-GCST005531.radial.ivw.csv", row.names = F)
write.csv(outliers, "Results/MG.ebi-a-GCST005531.radial.ivwr.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ebi-a-GCST005531.radial.ivw.allsnps.csv", row.names = F)
egger <- egger_radial(r_input,0.05,2)
radial.egger = as.data.table(egger$coef)
coef = c("Intercept", "Wj")
radial.egger = cbind(coef,radial.egger )
outliers = as.data.table(egger$outliers)
data = as.data.table(egger$data)
write.csv(radial.egger, "Results/MG.ebi-a-GCST005531.radial.egger.csv", row.names = F)
write.csv(outliers, "Results/MG.ebi-a-GCST005531.radial.egger.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ebi-a-GCST005531.radial.egger.allsnps.csv", row.names = F)
#ebi-a-GCST005536
for(i in 3:3)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
#write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
#write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
#write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
#p2
#write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
# Plots
#res <- mr(dat) # to show only a few of them
mr_scatter_plot(res, dat)
ggsave("Plots/MG.ebi-a-GCST005536.Scatter_plot.jpeg", width=7, height=7)
mr_funnel_plot(res_single)
ggsave("Plots/MG.ebi-a-GCST005536.Funnel_plot.jpeg", width=7, height=7)
res_single <- mr_singlesnp(dat)
mr_forest_plot(res_single)
ggsave("Plots/MG.ebi-a-GCST005536.Forest_plot.singleSNP.jpeg", width=7, height=7)
res_loo <- mr_leaveoneout(dat)
write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ebi-a-GCST005536.LOO.txt",sep = ""), quote = F, sep = ",")
mr_forest_plot(res_loo)
ggsave("Plots/MG.ebi-a-GCST005536.Forest_plot.LOO.jpeg", width=7, height=7)
#RADIAL analysis to detect and remove outliers
r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
ivw <- ivw_radial(r_input,0.05,2)
ivw.radial = as.data.table(ivw$coef)
coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
ivw.radial = cbind(coef, ivw.radial)
outliers = as.data.table(ivw$outliers)
data = as.data.table(ivw$data)
write.csv(ivw.radial, "Results/MG.ebi-a-GCST005536.radial.ivw.csv", row.names = F)
write.csv(outliers, "Results/MG.ebi-a-GCST005536.radial.ivwr.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ebi-a-GCST005536.radial.ivw.allsnps.csv", row.names = F)
egger <- egger_radial(r_input,0.05,2)
radial.egger = as.data.table(egger$coef)
coef = c("Intercept", "Wj")
radial.egger = cbind(coef,radial.egger )
outliers = as.data.table(egger$outliers)
data = as.data.table(egger$data)
write.csv(radial.egger, "Results/MG.ebi-a-GCST005536.radial.egger.csv", row.names = F)
write.csv(outliers, "Results/MG.ebi-a-GCST005536.radial.egger.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ebi-a-GCST005536.radial.egger.allsnps.csv", row.names = F)
# ukb-a-190
for(i in 4:4)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
#write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
#write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
#write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
#p2
#write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
# Plots
#res <- mr(dat) # to show only a few of them
mr_scatter_plot(res, dat)
ggsave("Plots/MG.ukb-a-190.Scatter_plot.jpeg", width=7, height=7)
mr_funnel_plot(res_single)
ggsave("Plots/MG.ukb-a-190.Funnel_plot.jpeg", width=7, height=7)
res_single <- mr_singlesnp(dat)
mr_forest_plot(res_single)
ggsave("Plots/MG.ukb-a-190.Forest_plot.singleSNP.jpeg", width=7, height=7)
res_loo <- mr_leaveoneout(dat)
write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ukb-a-190.LOO.txt",sep = ""), quote = F, sep = ",")
mr_forest_plot(res_loo)
ggsave("Plots/MG.ukb-a-190.Forest_plot.LOO.jpeg", width=7, height=7)
#RADIAL analysis to detect and remove outliers
r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
ivw <- ivw_radial(r_input,0.05,2)
ivw.radial = as.data.table(ivw$coef)
coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
ivw.radial = cbind(coef, ivw.radial)
outliers = as.data.table(ivw$outliers)
data = as.data.table(ivw$data)
write.csv(ivw.radial, "Results/MG.ukb-a-190.radial.ivw.csv", row.names = F)
write.csv(outliers, "Results/MG.ukb-a-190.radial.ivwr.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ukb-a-190.radial.ivw.allsnps.csv", row.names = F)
egger <- egger_radial(r_input,0.05,2)
radial.egger = as.data.table(egger$coef)
coef = c("Intercept", "Wj")
radial.egger = cbind(coef,radial.egger )
outliers = as.data.table(egger$outliers)
data = as.data.table(egger$data)
write.csv(radial.egger, "Results/MG.ukb-a-190.radial.egger.csv", row.names = F)
write.csv(outliers, "Results/MG.ukb-a-190.radial.egger.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ukb-a-190.radial.egger.allsnps.csv", row.names = F)
# ukb-a-77
for(i in 5:5)
{
instrumentId <- as.character(listOfGwasIds$id[i])
tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
print(tag)
flagged <- "nope"
Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
r2 = 0.001, kb = 10000, access_token = token,
force_server = TRUE)
skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
if(skip == 0)
{
dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
res <-mr(dat)
print(res)
het <- mr_heterogeneity(dat)
print(het)
ple <- mr_pleiotropy_test(dat)
print(ple)
#write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
#write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
#write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
res_single <- mr_singlesnp(dat)
p2 <- mr_forest_plot(res_single)
#p2
#write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
}
else
{
print("FAIL")
}
}
# Plots
#res <- mr(dat) # to show only a few of them
mr_scatter_plot(res, dat)
ggsave("Plots/MG.ukb-a-77.Scatter_plot.jpeg", width=7, height=7)
mr_funnel_plot(res_single)
ggsave("Plots/MG.ukb-a-77.Funnel_plot.jpeg", width=7, height=7)
res_single <- mr_singlesnp(dat)
mr_forest_plot(res_single)
ggsave("Plots/MG.ukb-a-77.Forest_plot.singleSNP.jpeg", width=7, height=7)
res_loo <- mr_leaveoneout(dat)
write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ukb-a-77.LOO.txt",sep = ""), quote = F, sep = ",")
mr_forest_plot(res_loo)
ggsave("Plots/MG.ukb-a-77.Forest_plot.LOO.jpeg", width=7, height=7)
#RADIAL analysis to detect and remove outliers
r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
ivw <- ivw_radial(r_input,0.05,2)
ivw.radial = as.data.table(ivw$coef)
coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
ivw.radial = cbind(coef, ivw.radial)
outliers = as.data.table(ivw$outliers)
data = as.data.table(ivw$data)
write.csv(ivw.radial, "Results/MG.ukb-a-77.radial.ivw.csv", row.names = F)
write.csv(outliers, "Results/MG.ukb-a-77.radial.ivwr.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ukb-a-77.radial.ivw.allsnps.csv", row.names = F)
egger <- egger_radial(r_input,0.05,2)
radial.egger = as.data.table(egger$coef)
coef = c("Intercept", "Wj")
radial.egger = cbind(coef,radial.egger )
outliers = as.data.table(egger$outliers)
data = as.data.table(egger$data)
write.csv(radial.egger, "Results/MG.ukb-a-77.radial.egger.csv", row.names = F)
write.csv(outliers, "Results/MG.ukb-a-77.radial.egger.outliers.csv", row.names = F)
write.csv(data, "Results/MG.ukb-a-77.radial.egger.allsnps.csv", row.names = F)
R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> #load packages
> library(data.table)
> library(TwoSampleMR)
> library(MRInstruments)
> library(RadialMR)
> library(ieugwasr)
> library(ggplot2)
> library(gplots)
> library(ggpubr)
> library(cowplot)
> library(svglite)
> MGtemp = fread("/data/NDRS_LNG/MyastheniaGravis/updated.April2020/MetaAnalysis.hg38.ByCohort.rerun/USmerged.Itals.hg38/META_MG.USmerged.Itals.UNRELATED.hg38.Rsq03.glm_filteredDirection.HetISq80MAF001cases.rsid.txt", header = T)
> head(MGtemp)
MarkerName CHROM POS OtherAllele EffectAllele maf_EA.CASE maf_EA.CTRL
1: 1:722408:C:G 1 722408 C G 0.16310731 0.15488040
2: 1:722700:G:A 1 722700 G A 0.01308062 0.01739071
3: 1:727233:G:A 1 727233 G A 0.01094501 0.01520484
4: 1:727242:G:A 1 727242 G A 0.08168713 0.09434974
5: 1:727717:C:G 1 727717 C G 0.14468767 0.13229310
6: 1:758351:A:G 1 758351 A G 0.08061933 0.09459720
beta StdErr P Direction HetISq rsID
1: 0.0594 0.0495 0.22990 -+ 68.7 rs75935175
2: -0.2150 0.1581 0.17400 -- 0.0 rs1415481957
3: -0.0362 0.1687 0.82990 +- 0.0 rs151190501
4: 0.1044 0.0661 0.11400 ++ 0.0 rs61769339
5: 0.1063 0.0522 0.04161 -- 50.8 rs61769340
6: 0.1025 0.0663 0.12210 -- 0.0 rs12238997
> MGtemp$SNP = MGtemp$rsID
> MGtemp$Freq1 = MGtemp$maf_EA.CASE
> MGtemp$effect_allele = MGtemp$EffectAllele
> MGtemp$other_allele = MGtemp$OtherAllele
> MGtemp$Beta = MGtemp$beta
> MGtemp$P.value = MGtemp$P
>
> Out_data = format_data(MGtemp,type="outcome", beta_col = "Beta", se_col = "StdErr", eaf_col = "Freq1", pval_col = "P.value")
>
> #the token will be different each time, it needs to be prepare at the moment
> token <- "ya29.a0AfH6SMAo__Nj5QnvHSW_bn9MUwi_bxPo-EIMXXw0GOXlbTx-Q0LQxTvUFyo8QM0T9uQW_gQUHA5LIM4LPI0dnHcA5k6HSKh_vW-78muxmPlY3i68r95LzdQyxAVJIc_ftK1nS27z2D1uQvr8JSk5CG-_CHiQBQ"
> #create a list with the GWAS id, this can be obtained from the ieu OpenGwas Project (https://gwas.mrcieu.ac.uk)
> listOfGwasIds <- read.table("/data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Data/SignificantExposures.txt", header = T)
>
> # ebi-a-GCST005531
>
> for(i in 2:2)
+ {
+ instrumentId <- as.character(listOfGwasIds$id[i])
+ tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
+ print(tag)
+ flagged <- "nope"
+ Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
+ r2 = 0.001, kb = 10000, access_token = token,
+ force_server = TRUE)
+ skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
+ if(skip == 0)
+ {
+ dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
+ res <-mr(dat)
+ print(res)
+ het <- mr_heterogeneity(dat)
+ print(het)
+ ple <- mr_pleiotropy_test(dat)
+ print(ple)
+ #write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
+ #write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
+ #write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
+ res_single <- mr_singlesnp(dat)
+ p2 <- mr_forest_plot(res_single)
+ #p2
+ #write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
+ }
+ else
+ {
+ print("FAIL")
+ }
+ }
[1] "INSTRUMENT IS ebi-a-GCST005531 AT i = 2"
id.exposure id.outcome outcome exposure
1 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
2 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
3 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
4 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
5 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
method nsnp b se pval
1 MR Egger 43 0.1697660 0.07595469 0.03092028
2 Weighted median 43 0.1273575 0.06104784 0.03696126
3 Inverse variance weighted 43 0.1457677 0.04086487 0.00036100
4 Simple mode 43 0.1765939 0.14149119 0.21891104
5 Weighted mode 43 0.1448459 0.06395989 0.02875682
id.exposure id.outcome outcome exposure
1 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
2 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
method Q Q_df Q_pval
1 MR Egger 49.27712 41 0.1758103
2 Inverse variance weighted 49.44743 42 0.2002339
id.exposure id.outcome outcome exposure
1 ebi-a-GCST005531 cYXqbz outcome Multiple sclerosis || id:ebi-a-GCST005531
egger_intercept se pval
1 -0.005026082 0.01335195 0.708538
>
> # Plots
>
> #res <- mr(dat) # to show only a few of them
> mr_scatter_plot(res, dat)
$`ebi-a-GCST005531.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005531 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005531.Scatter_plot.jpeg", width=7, height=7)
>
> mr_funnel_plot(res_single)
$`ebi-a-GCST005531.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005531 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005531.Funnel_plot.jpeg", width=7, height=7)
>
>
> res_single <- mr_singlesnp(dat)
> mr_forest_plot(res_single)
$`ebi-a-GCST005531.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005531 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005531.Forest_plot.singleSNP.jpeg", width=7, height=7)
>
>
>
>
> res_loo <- mr_leaveoneout(dat)
> write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ebi-a-GCST002318.LOO.txt",sep = ""), quote = F, sep = ",")
> mr_forest_plot(res_loo)
$`ebi-a-GCST005531.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005531 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005531.Forest_plot.LOO.jpeg", width=7, height=7)
>
> #RADIAL analysis to detect and remove outliers
>
> r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
> ivw <- ivw_radial(r_input,0.05,2)
Radial IVW
Estimate Std.Error t value Pr(>|t|)
Effect (2nd) 0.1424691 0.04020715 3.543378 3.950365e-04
Iterative 0.1457614 0.04085819 3.567495 3.604107e-04
Exact (FE) 0.1478895 0.03774042 3.918597 8.906586e-05
Exact (RE) 0.1475745 0.04610665 3.200720 2.611584e-03
Residual standard error: 1.058 on 42 degrees of freedom
F-statistic: 12.56 on 1 and 42 DF, p-value: 0.000984
Q-Statistic for heterogeneity: 47.02492 on 42 DF , p-value: 0.2742754
Outliers detected
Number of iterations = 2
> ivw.radial = as.data.table(ivw$coef)
> coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
> ivw.radial = cbind(coef, ivw.radial)
> outliers = as.data.table(ivw$outliers)
> data = as.data.table(ivw$data)
>
> write.csv(ivw.radial, "Results/MG.ebi-a-GCST005531.radial.ivw.csv", row.names = F)
> write.csv(outliers, "Results/MG.ebi-a-GCST005531.radial.ivwr.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ebi-a-GCST005531.radial.ivw.allsnps.csv", row.names = F)
>
>
>
> egger <- egger_radial(r_input,0.05,2)
Radial MR-Egger
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.2870442 0.32851096 -0.8737736 0.38733088
Wj 0.2047132 0.08185552 2.5009086 0.01647612
Residual standard error: 1.061 on 41 degrees of freedom
F-statistic: 6.25 on 1 and 41 DF, p-value: 0.0165
Q-Statistic for heterogeneity: 46.16525 on 41 DF , p-value: 0.2672873
Outliers detected
> radial.egger = as.data.table(egger$coef)
> coef = c("Intercept", "Wj")
> radial.egger = cbind(coef,radial.egger )
> outliers = as.data.table(egger$outliers)
> data = as.data.table(egger$data)
> write.csv(radial.egger, "Results/MG.ebi-a-GCST005531.radial.egger.csv", row.names = F)
> write.csv(outliers, "Results/MG.ebi-a-GCST005531.radial.egger.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ebi-a-GCST005531.radial.egger.allsnps.csv", row.names = F)
>
> #ebi-a-GCST005536
> for(i in 3:3)
+ {
+ instrumentId <- as.character(listOfGwasIds$id[i])
+ tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
+ print(tag)
+ flagged <- "nope"
+ Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
+ r2 = 0.001, kb = 10000, access_token = token,
+ force_server = TRUE)
+ skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
+ if(skip == 0)
+ {
+ dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
+ res <-mr(dat)
+ print(res)
+ het <- mr_heterogeneity(dat)
+ print(het)
+ ple <- mr_pleiotropy_test(dat)
+ print(ple)
+ #write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
+ #write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
+ #write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
+ res_single <- mr_singlesnp(dat)
+ p2 <- mr_forest_plot(res_single)
+ #p2
+ #write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
+ }
+ else
+ {
+ print("FAIL")
+ }
+ }
[1] "INSTRUMENT IS ebi-a-GCST005536 AT i = 3"
id.exposure id.outcome outcome exposure
1 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
2 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
3 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
4 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
5 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
method nsnp b se pval
1 MR Egger 30 0.3086710 0.11678594 1.330352e-02
2 Weighted median 30 0.1874440 0.05597493 8.118764e-04
3 Inverse variance weighted 30 0.3003068 0.06644808 6.200911e-06
4 Simple mode 30 0.3209438 0.13203412 2.148249e-02
5 Weighted mode 30 0.2321730 0.06097785 6.729821e-04
id.exposure id.outcome outcome exposure
1 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
2 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
method Q Q_df Q_pval
1 MR Egger 99.55348 28 5.982329e-10
2 Inverse variance weighted 99.58091 29 1.141577e-09
id.exposure id.outcome outcome exposure
1 ebi-a-GCST005536 cYXqbz outcome Type 1 diabetes || id:ebi-a-GCST005536
egger_intercept se pval
1 -0.002250609 0.02562195 0.9306298
>
> # Plots
>
> #res <- mr(dat) # to show only a few of them
> mr_scatter_plot(res, dat)
$`ebi-a-GCST005536.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005536 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005536.Scatter_plot.jpeg", width=7, height=7)
>
> mr_funnel_plot(res_single)
$`ebi-a-GCST005536.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005536 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005536.Funnel_plot.jpeg", width=7, height=7)
>
>
> res_single <- mr_singlesnp(dat)
> mr_forest_plot(res_single)
$`ebi-a-GCST005536.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005536 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005536.Forest_plot.singleSNP.jpeg", width=7, height=7)
>
>
>
>
> res_loo <- mr_leaveoneout(dat)
> write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ebi-a-GCST005536.LOO.txt",sep = ""), quote = F, sep = ",")
> mr_forest_plot(res_loo)
$`ebi-a-GCST005536.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ebi-a-GCST005536 cYXqbz
> ggsave("Plots/MG.ebi-a-GCST005536.Forest_plot.LOO.jpeg", width=7, height=7)
>
> #RADIAL analysis to detect and remove outliers
>
> r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
> ivw <- ivw_radial(r_input,0.05,2)
Radial IVW
Estimate Std.Error t value Pr(>|t|)
Effect (2nd) 0.2840693 0.06240785 4.551820 5.318378e-06
Iterative 0.3006955 0.06649085 4.522360 6.115404e-06
Exact (FE) 0.3105280 0.03631475 8.551015 1.220108e-17
Exact (RE) 0.3032667 0.08048099 3.768178 7.478791e-04
Residual standard error: 1.701 on 29 degrees of freedom
F-statistic: 20.72 on 1 and 29 DF, p-value: 8.8e-05
Q-Statistic for heterogeneity: 83.88947 on 29 DF , p-value: 3.079136e-07
Outliers detected
Number of iterations = 3
> ivw.radial = as.data.table(ivw$coef)
> coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
> ivw.radial = cbind(coef, ivw.radial)
> outliers = as.data.table(ivw$outliers)
> data = as.data.table(ivw$data)
>
> write.csv(ivw.radial, "Results/MG.ebi-a-GCST005536.radial.ivw.csv", row.names = F)
> write.csv(outliers, "Results/MG.ebi-a-GCST005536.radial.ivwr.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ebi-a-GCST005536.radial.ivw.allsnps.csv", row.names = F)
>
>
>
> egger <- egger_radial(r_input,0.05,2)
Radial MR-Egger
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1927063 0.5696266 0.3383028 0.73765866
Wj 0.2518175 0.1144815 2.1996343 0.03625414
Residual standard error: 1.727 on 28 degrees of freedom
F-statistic: 4.84 on 1 and 28 DF, p-value: 0.0363
Q-Statistic for heterogeneity: 83.54797 on 28 DF , p-value: 1.954356e-07
Outliers detected
> radial.egger = as.data.table(egger$coef)
> coef = c("Intercept", "Wj")
> radial.egger = cbind(coef,radial.egger )
> outliers = as.data.table(egger$outliers)
> data = as.data.table(egger$data)
> write.csv(radial.egger, "Results/MG.ebi-a-GCST005536.radial.egger.csv", row.names = F)
> write.csv(outliers, "Results/MG.ebi-a-GCST005536.radial.egger.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ebi-a-GCST005536.radial.egger.allsnps.csv", row.names = F)
>
>
>
> # ukb-a-190
>
> for(i in 4:4)
+ {
+ instrumentId <- as.character(listOfGwasIds$id[i])
+ tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
+ print(tag)
+ flagged <- "nope"
+ Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
+ r2 = 0.001, kb = 10000, access_token = token,
+ force_server = TRUE)
+ skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
+ if(skip == 0)
+ {
+ dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
+ res <-mr(dat)
+ print(res)
+ het <- mr_heterogeneity(dat)
+ print(het)
+ ple <- mr_pleiotropy_test(dat)
+ print(ple)
+ #write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
+ #write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
+ #write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
+ res_single <- mr_singlesnp(dat)
+ p2 <- mr_forest_plot(res_single)
+ #p2
+ #write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
+ }
+ else
+ {
+ print("FAIL")
+ }
+ }
[1] "INSTRUMENT IS ukb-a-190 AT i = 4"
id.exposure id.outcome outcome
1 ukb-a-190 cYXqbz outcome
2 ukb-a-190 cYXqbz outcome
3 ukb-a-190 cYXqbz outcome
4 ukb-a-190 cYXqbz outcome
5 ukb-a-190 cYXqbz outcome
exposure
1 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
2 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
3 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
4 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
5 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
method nsnp b se pval
1 MR Egger 47 12.735893 6.148272 4.407815e-02
2 Weighted median 47 7.277228 2.430490 2.752191e-03
3 Inverse variance weighted 47 10.934091 2.381289 4.397138e-06
4 Simple mode 47 5.618300 5.952328 3.501609e-01
5 Weighted mode 47 3.587163 7.400761 6.301879e-01
id.exposure id.outcome outcome
1 ukb-a-190 cYXqbz outcome
2 ukb-a-190 cYXqbz outcome
exposure
1 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
2 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
method Q Q_df Q_pval
1 MR Egger 139.6972 45 1.240090e-11
2 Inverse variance weighted 140.0120 46 1.990366e-11
id.exposure id.outcome outcome
1 ukb-a-190 cYXqbz outcome
exposure
1 Treatment/medication code: levothyroxine sodium || id:ukb-a-190
egger_intercept se pval
1 -0.008787168 0.02759543 0.7516332
>
> # Plots
>
> #res <- mr(dat) # to show only a few of them
> mr_scatter_plot(res, dat)
$`ukb-a-190.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-190 cYXqbz
> ggsave("Plots/MG.ukb-a-190.Scatter_plot.jpeg", width=7, height=7)
>
> mr_funnel_plot(res_single)
$`ukb-a-190.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-190 cYXqbz
> ggsave("Plots/MG.ukb-a-190.Funnel_plot.jpeg", width=7, height=7)
>
>
> res_single <- mr_singlesnp(dat)
> mr_forest_plot(res_single)
$`ukb-a-190.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-190 cYXqbz
> ggsave("Plots/MG.ukb-a-190.Forest_plot.singleSNP.jpeg", width=7, height=7)
>
>
>
>
> res_loo <- mr_leaveoneout(dat)
> write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ukb-a-190.LOO.txt",sep = ""), quote = F, sep = ",")
> mr_forest_plot(res_loo)
$`ukb-a-190.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-190 cYXqbz
> ggsave("Plots/MG.ukb-a-190.Forest_plot.LOO.jpeg", width=7, height=7)
>
> #RADIAL analysis to detect and remove outliers
>
> r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
> ivw <- ivw_radial(r_input,0.05,2)
Radial IVW
Estimate Std.Error t value Pr(>|t|)
Effect (2nd) 9.889822 2.274641 4.347861 1.374717e-05
Iterative 10.934084 2.381355 4.591540 4.399878e-06
Exact (FE) 1.999940 1.365400 1.464728 1.429951e-01
Exact (RE) 11.099646 2.852497 3.891204 3.197289e-04
Residual standard error: 1.62 on 46 degrees of freedom
F-statistic: 18.9 on 1 and 46 DF, p-value: 7.54e-05
Q-Statistic for heterogeneity: 120.7264 on 46 DF , p-value: 1.263848e-08
Outliers detected
Number of iterations = 2
> ivw.radial = as.data.table(ivw$coef)
> coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
> ivw.radial = cbind(coef, ivw.radial)
> outliers = as.data.table(ivw$outliers)
> data = as.data.table(ivw$data)
>
> write.csv(ivw.radial, "Results/MG.ukb-a-190.radial.ivw.csv", row.names = F)
> write.csv(outliers, "Results/MG.ukb-a-190.radial.ivwr.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ukb-a-190.radial.ivw.allsnps.csv", row.names = F)
>
>
>
> egger <- egger_radial(r_input,0.05,2)
Radial MR-Egger
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1636549 0.719393 0.2274902 0.8210730
Wj 8.4038105 6.924778 1.2135855 0.2312375
Residual standard error: 1.637 on 45 degrees of freedom
F-statistic: 1.47 on 1 and 45 DF, p-value: 0.231
Q-Statistic for heterogeneity: 120.5877 on 45 DF , p-value: 7.930066e-09
Outliers detected
> radial.egger = as.data.table(egger$coef)
> coef = c("Intercept", "Wj")
> radial.egger = cbind(coef,radial.egger )
> outliers = as.data.table(egger$outliers)
> data = as.data.table(egger$data)
> write.csv(radial.egger, "Results/MG.ukb-a-190.radial.egger.csv", row.names = F)
> write.csv(outliers, "Results/MG.ukb-a-190.radial.egger.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ukb-a-190.radial.egger.allsnps.csv", row.names = F)
>
>
> # ukb-a-77
>
> for(i in 5:5)
+ {
+ instrumentId <- as.character(listOfGwasIds$id[i])
+ tag <- paste("INSTRUMENT IS ",instrumentId," AT i = ",i, sep = "")
+ print(tag)
+ flagged <- "nope"
+ Exp_data <- extract_instruments(outcomes=instrumentId, p1 = 5e-08, clump = TRUE, p2 = 5e-08,
+ r2 = 0.001, kb = 10000, access_token = token,
+ force_server = TRUE)
+ skip <- ifelse(length(Exp_data$beta.exposure) < 1, 1, 0)
+ if(skip == 0)
+ {
+ dat <- harmonise_data(exposure_dat=Exp_data, outcome_dat=Out_data, action=2)
+ res <-mr(dat)
+ print(res)
+ het <- mr_heterogeneity(dat)
+ print(het)
+ ple <- mr_pleiotropy_test(dat)
+ print(ple)
+ #write.table(res, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.1res.txt",sep = ""), quote = F, sep = ",")
+ #write.table(het, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.2het.txt",sep = ""), quote = F, sep = ",")
+ #write.table(ple, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.3ple.txt",sep = ""), quote = F, sep = ",")
+ res_single <- mr_singlesnp(dat)
+ p2 <- mr_forest_plot(res_single)
+ #p2
+ #write.table(res_single, file = paste("Results/",instrumentId,"_LBD_cardiovascular27.4res_single.txt",sep = ""), quote = F, sep = ",")
+ }
+ else
+ {
+ print("FAIL")
+ }
+ }
[1] "INSTRUMENT IS ukb-a-77 AT i = 5"
id.exposure id.outcome outcome
1 ukb-a-77 cYXqbz outcome
2 ukb-a-77 cYXqbz outcome
3 ukb-a-77 cYXqbz outcome
4 ukb-a-77 cYXqbz outcome
5 ukb-a-77 cYXqbz outcome
exposure
1 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
2 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
3 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
4 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
5 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
method nsnp b se pval
1 MR Egger 67 9.449497 3.908585 1.843526e-02
2 Weighted median 67 8.954881 1.959963 4.902813e-06
3 Inverse variance weighted 67 9.061093 1.643453 3.518703e-08
4 Simple mode 67 8.159686 5.154464 1.181945e-01
5 Weighted mode 67 9.714673 8.859123 2.768132e-01
id.exposure id.outcome outcome
1 ukb-a-77 cYXqbz outcome
2 ukb-a-77 cYXqbz outcome
exposure
1 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
2 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
method Q Q_df Q_pval
1 MR Egger 166.8063 65 6.777066e-11
2 Inverse variance weighted 166.8372 66 1.089483e-10
id.exposure id.outcome outcome
1 ukb-a-77 cYXqbz outcome
exposure
1 Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77
egger_intercept se pval
1 -0.002169242 0.01977369 0.9129826
>
> # Plots
>
> #res <- mr(dat) # to show only a few of them
> mr_scatter_plot(res, dat)
$`ukb-a-77.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-77 cYXqbz
> ggsave("Plots/MG.ukb-a-77.Scatter_plot.jpeg", width=7, height=7)
>
> mr_funnel_plot(res_single)
$`ukb-a-77.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-77 cYXqbz
> ggsave("Plots/MG.ukb-a-77.Funnel_plot.jpeg", width=7, height=7)
>
>
> res_single <- mr_singlesnp(dat)
> mr_forest_plot(res_single)
$`ukb-a-77.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-77 cYXqbz
> ggsave("Plots/MG.ukb-a-77.Forest_plot.singleSNP.jpeg", width=7, height=7)
>
>
>
>
> res_loo <- mr_leaveoneout(dat)
> write.table(res_loo, file = paste("Results/",instrumentId,"_MG.ukb-a-77.LOO.txt",sep = ""), quote = F, sep = ",")
> mr_forest_plot(res_loo)
$`ukb-a-77.cYXqbz`
attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
id.exposure id.outcome
1 ukb-a-77 cYXqbz
> ggsave("Plots/MG.ukb-a-77.Forest_plot.LOO.jpeg", width=7, height=7)
>
> #RADIAL analysis to detect and remove outliers
>
> r_input <- format_radial(BXG = dat$beta.exposure, BYG = dat$beta.outcome, seBXG = dat$se.exposure,seBYG =dat$se.outcome, RSID = dat$SNP)
> ivw <- ivw_radial(r_input,0.05,2)
Radial IVW
Estimate Std.Error t value Pr(>|t|)
Effect (2nd) 8.315636 1.562361 5.322480 1.023620e-07
Iterative 9.027182 1.617545 5.580792 2.394261e-08
Exact (FE) 1.999940 1.025252 1.950681 5.109505e-02
Exact (RE) 9.160281 2.199050 4.165563 9.085086e-05
Residual standard error: 1.489 on 67 degrees of freedom
F-statistic: 28.33 on 1 and 67 DF, p-value: 1.28e-06
Q-Statistic for heterogeneity: 148.5887 on 67 DF , p-value: 4.027853e-08
Outliers detected
Number of iterations = 3
> ivw.radial = as.data.table(ivw$coef)
> coef = c("Effect_2nd", "Iterative", "Exact_FE", "Exact_RE")
> ivw.radial = cbind(coef, ivw.radial)
> outliers = as.data.table(ivw$outliers)
> data = as.data.table(ivw$data)
>
> write.csv(ivw.radial, "Results/MG.ukb-a-77.radial.ivw.csv", row.names = F)
> write.csv(outliers, "Results/MG.ukb-a-77.radial.ivwr.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ukb-a-77.radial.ivw.allsnps.csv", row.names = F)
>
>
>
> egger <- egger_radial(r_input,0.05,2)
Radial MR-Egger
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.01829303 0.4977249 0.0367533 0.97079265
Wj 8.16833227 4.3059583 1.8969836 0.06220698
Residual standard error: 1.5 on 66 degrees of freedom
F-statistic: 3.6 on 1 and 66 DF, p-value: 0.0622
Q-Statistic for heterogeneity: 148.5856 on 66 DF , p-value: 2.646655e-08
Outliers detected
> radial.egger = as.data.table(egger$coef)
> coef = c("Intercept", "Wj")
> radial.egger = cbind(coef,radial.egger )
> outliers = as.data.table(egger$outliers)
> data = as.data.table(egger$data)
> write.csv(radial.egger, "Results/MG.ukb-a-77.radial.egger.csv", row.names = F)
> write.csv(outliers, "Results/MG.ukb-a-77.radial.egger.outliers.csv", row.names = F)
> write.csv(data, "Results/MG.ukb-a-77.radial.egger.allsnps.csv", row.names = F)
>
>
>
>
>
>
bash: line 3: Module: command not found
TwoSampleMR version 0.5.5
[>] New: Option to use non-European LD reference panels for clumping etc
[>] Some studies temporarily quarantined to verify effect allele
[>] See news(package='TwoSampleMR') and https://gwas.mrcieu.ac.uk for further details
API: public: http://gwas-api.mrcieu.ac.uk/
Attaching package: ‘ieugwasr’
The following object is masked from ‘package:TwoSampleMR’:
ld_matrix
Attaching package: ‘gplots’
The following object is masked from ‘package:stats’:
lowess
Attaching package: ‘cowplot’
The following object is masked from ‘package:ggpubr’:
get_legend
No phenotype name specified, defaulting to 'outcome'.
Warning message:
In .fun(piece, ...) :
Duplicated SNPs present in exposure data for phenotype 'outcome. Just keeping the first instance:
rs145226353
rs116080943
rs79346640
rs5772990
rs145411679
rs142033339
rs150218993
rs59254935
rs58682971
rs113702600
rs114898411
rs1889702
rs11338667
rs76955023
rs140270114
rs80204938
rs1172532657
rs6703322
rs151105246
rs115847127
rs75815237
rs17419922
rs12078034
rs7553955
rs12040457
rs116379201
rs74713565
rs74854435
rs17544210
rs10465606
rs16838089
rs576963479
rs146567885
rs143857075
rs35741345
rs76302207
rs2609478
rs12029935
rs28535260
rs111961225
rs12079573
rs3767296
rs17188402
rs539821297
rs11581625
rs72431984
rs138610483
rs369158927
rs149604003
rs116532476
rs28715399
rs12081130
rs35380268
rs376075070
rs116683227
rs76439953
rs74765817
rs75813712
rs1345612268
rs371690583
rs12027758
rs7527025
rs116189815
rs75337743
rs72783268
rs56252443
rs117500436
rs6713932
rs57358917
rs163532
rs76224375
rs71423929
rs71407542
rs193249100
rs116755276
rs10490245
rs79656435
rs59532310
rs79491842
rs72827351
r [... truncated]
Harmonising Multiple sclerosis || id:ebi-a-GCST005531 (ebi-a-GCST005531) and outcome (cYXqbz)
Analysing 'ebi-a-GCST005531' on 'cYXqbz'
Warning message:
Ignoring unknown aesthetics: text
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Harmonising Type 1 diabetes || id:ebi-a-GCST005536 (ebi-a-GCST005536) and outcome (cYXqbz)
Analysing 'ebi-a-GCST005536' on 'cYXqbz'
Warning message:
In .fun(piece, ...) :
Duplicated SNPs present in exposure data for phenotype 'Type 1 diabetes || id:ebi-a-GCST005536. Just keeping the first instance:
rs12927355
Warning message:
Ignoring unknown aesthetics: text
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Harmonising Treatment/medication code: levothyroxine sodium || id:ukb-a-190 (ukb-a-190) and outcome (cYXqbz)
Analysing 'ukb-a-190' on 'cYXqbz'
Warning message:
Ignoring unknown aesthetics: text
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Harmonising Non-cancer illness code self-reported: hypothyroidism/myxoedema || id:ukb-a-77 (ukb-a-77) and outcome (cYXqbz)
Removing the following SNPs for being palindromic with intermediate allele frequencies:
rs2921053
Analysing 'ukb-a-77' on 'cYXqbz'
Warning message:
Ignoring unknown aesthetics: text
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
Warning messages:
1: Removed 1 rows containing missing values (geom_errorbarh).
2: Removed 1 rows containing missing values (geom_point).
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ebi-a-GCST002318.radial.ivw.csv",sep=",")
res
| coef | Estimate | Std.Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Effect_2nd | 0.225142 | 0.061870 | 3.638946 | 2.737562e-04 |
| 1 | Iterative | 0.245390 | 0.065649 | 3.737916 | 1.855520e-04 |
| 2 | Exact_FE | 0.253004 | 0.038182 | 6.626350 | 3.440899e-11 |
| 3 | Exact_RE | 0.248095 | 0.087340 | 2.840562 | 6.414810e-03 |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ebi-a-GCST002318.radial.egger.csv",sep=",")
res
| coef | Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Intercept | 0.220935 | 0.454362 | 0.486253 | 0.628871 |
| 1 | Wj | 0.170806 | 0.127953 | 1.334909 | 0.187835 |
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST002318.Scatter_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST002318.Funnel_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST002318.Forest_plot.singleSNP.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST002318.Forest_plot.LOO.jpeg', width = 1000, height = 800)
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ebi-a-GCST005531.radial.ivw.csv",sep=",")
res
| coef | Estimate | Std.Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Effect_2nd | 0.142469 | 0.040207 | 3.543378 | 0.000395 |
| 1 | Iterative | 0.145761 | 0.040858 | 3.567495 | 0.000360 |
| 2 | Exact_FE | 0.147890 | 0.037740 | 3.918597 | 0.000089 |
| 3 | Exact_RE | 0.147574 | 0.046107 | 3.200720 | 0.002612 |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ebi-a-GCST005531.radial.egger.csv",sep=",")
res
| coef | Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Intercept | -0.287044 | 0.328511 | -0.873774 | 0.387331 |
| 1 | Wj | 0.204713 | 0.081856 | 2.500909 | 0.016476 |
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005531.Scatter_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005531.Funnel_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005531.Forest_plot.singleSNP.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005531.Forest_plot.LOO.jpeg', width = 1000, height = 800)
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ebi-a-GCST005536.radial.ivw.csv",sep=",")
res
| coef | Estimate | Std.Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Effect_2nd | 0.284069 | 0.062408 | 4.551820 | 5.318378e-06 |
| 1 | Iterative | 0.300696 | 0.066491 | 4.522360 | 6.115404e-06 |
| 2 | Exact_FE | 0.310528 | 0.036315 | 8.551015 | 1.220108e-17 |
| 3 | Exact_RE | 0.303267 | 0.080481 | 3.768178 | 7.478791e-04 |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ebi-a-GCST005536.radial.egger.csv",sep=",")
res
| coef | Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Intercept | 0.192706 | 0.569627 | 0.338303 | 0.737659 |
| 1 | Wj | 0.251818 | 0.114482 | 2.199634 | 0.036254 |
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005536.Scatter_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005536.Funnel_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005536.Forest_plot.singleSNP.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ebi-a-GCST005536.Forest_plot.LOO.jpeg', width = 1000, height = 800)
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ukb-a-77.radial.ivw.csv",sep=",")
res
| coef | Estimate | Std.Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Effect_2nd | 8.315636 | 1.562361 | 5.322480 | 1.023620e-07 |
| 1 | Iterative | 9.027182 | 1.617545 | 5.580792 | 2.394261e-08 |
| 2 | Exact_FE | 1.999940 | 1.025252 | 1.950681 | 5.109505e-02 |
| 3 | Exact_RE | 9.160281 | 2.199050 | 4.165563 | 9.085086e-05 |
import numpy as np
import pandas as pd
res = pd.read_csv("MR_Base/Results/MG.ukb-a-77.radial.egger.csv",sep=",")
res
| coef | Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| 0 | Intercept | 0.018293 | 0.497725 | 0.036753 | 0.970793 |
| 1 | Wj | 8.168332 | 4.305958 | 1.896984 | 0.062207 |
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ukb-a-77.Scatter_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ukb-a-77.Funnel_plot.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ukb-a-77.Forest_plot.singleSNP.jpeg', width = 1000, height = 800)
from IPython.display import Image
Image(filename='MR_Base/Plots/MG.ukb-a-77.Forest_plot.LOO.jpeg', width = 1000, height = 800)
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results
module load R/4.0.3
R --vanilla --no-save
# Load libraries
library(data.table)
library(tidyverse)
library(dplyr)
#open clean ukb-a-ALL file witht the results
ukbresults = fread("RESULTS.ukb-a.txt", header = T)
ebiresults = fread("RESULTS.ebi-a.txt", header = T)
binded = rbind(ukbresults,ebiresults, fill=TRUE)
write.table(binded, "MR.SignificantResults.tab", quote = F, row.names = F, sep = "/t")
ukbple = fread("PLEITROPY.ukb-a.txt", header = T)
ebiple = fread("PLEITROPY.ebi-a.txt", header = T)
binded = rbind(ukbple,ebiple, fill=TRUE)
write.table(binded, "MR.SignificantPleiotropy.tab", quote = F, row.names = F, sep = "/t")
ukbhet = fread("HETEROGENEITY.ukb-a.txt", header = T)
ebihet = fread("HETEROGENEITY.ebi-a.txt", header = T)
binded = rbind(ukbhet,ebihet, fill=TRUE)
write.table(binded, "MR.SignificantHeterogeneity.tab",quote = F, row.names = F, sep = "/t")
R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> # Load libraries
> library(data.table)
> library(tidyverse)
> library(dplyr)
> #open clean ukb-a-ALL file witht the results
> ukbresults = fread("RESULTS.ukb-a.txt", header = T)
> ebiresults = fread("RESULTS.ebi-a.txt", header = T)
> binded = rbind(ukbresults,ebiresults, fill=TRUE)
> write.table(binded, "MR.SignificantResults.tab", quote = F, row.names = F, sep = "/t")
>
> ukbple = fread("PLEITROPY.ukb-a.txt", header = T)
> ebiple = fread("PLEITROPY.ebi-a.txt", header = T)
> binded = rbind(ukbple,ebiple, fill=TRUE)
> write.table(binded, "MR.SignificantPleiotropy.tab", quote = F, row.names = F, sep = "/t")
>
>
> ukbhet = fread("HETEROGENEITY.ukb-a.txt", header = T)
> ebihet = fread("HETEROGENEITY.ebi-a.txt", header = T)
> binded = rbind(ukbhet,ebihet, fill=TRUE)
> write.table(binded, "MR.SignificantHeterogeneity.tab",quote = F, row.names = F, sep = "/t")
>
>
[-] Unloading gcc 9.2.0 ... [-] Unloading GSL 2.6 for GCC 9.2.0 ... [-] Unloading openmpi 3.1.4 for GCC 9.2.0 [-] Unloading ImageMagick 7.0.8 on cn1003 [-] Unloading HDF5 1.10.4 [-] Unloading NetCDF 4.7.4_gcc9.2.0 [-] Unloading pandoc 2.11.4 on cn1003 [-] Unloading pcre2 10.21 ... [-] Unloading R 4.0.3 [+] Loading gcc 9.2.0 ... [+] Loading GSL 2.6 for GCC 9.2.0 ... [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading openmpi 3.1.4 for GCC 9.2.0 [+] Loading ImageMagick 7.0.8 on cn1003 [+] Loading HDF5 1.10.4 [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading NetCDF 4.7.4_gcc9.2.0 [+] Loading pandoc 2.11.4 on cn1003 [+] Loading pcre2 10.21 ... [+] Loading R 4.0.3 The following have been reloaded with a version change: 1) R/4.0 => R/4.0.3 ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ── ✔ ggplot2 3.3.3 ✔ purrr 0.3.4 ✔ tibble 3.1.0 ✔ dplyr 1.0.5 ✔ tidyr 1.1.3 ✔ stringr 1.4.0 ✔ readr 1.4.0 ✔ forcats 0.5.1 ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::between() masks data.table::between() ✖ dplyr::filter() masks stats::filter() ✖ dplyr::first() masks data.table::first() ✖ dplyr::lag() masks stats::lag() ✖ dplyr::last() masks data.table::last() ✖ purrr::transpose() masks data.table::transpose()
%%bash
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results
module load R/4.0.3
R --vanilla --no-save
# Load libraries
library(data.table)
library(tidyverse)
library(dplyr)
#open clean ukb-a-ALL file witht the results
GCST002318 = read.csv("MG.ebi-a-GCST002318.radial.ivw.csv", header = T)
dat = filter(GCST002318, coef == "Effect_2nd")
dat$pvalue = dat$Pr...t..
dat$Trait = "ebi-a-GCST002318"
dat1 = select(dat, Trait, Estimate, Std.Error, pvalue)
head(dat1)
GCST005531 = read.csv("MG.ebi-a-GCST005531.radial.ivw.csv", header = T)
dat = filter(GCST005531, coef == "Effect_2nd")
dat$pvalue = dat$Pr...t..
dat$Trait = "ebi-a-GCST005531"
dat2 = select(dat, Trait, Estimate, Std.Error, pvalue)
head(dat2)
ukba77 = read.csv("MG.ukb-a-77.radial.ivw.csv", header = T)
dat = filter(ukba77, coef == "Effect_2nd")
dat$pvalue = dat$Pr...t..
dat$Trait = "ukb-a-77"
dat3 = select(dat, Trait, Estimate, Std.Error, pvalue)
head(dat3)
GCST005536 = read.csv("MG.ebi-a-GCST005536.radial.ivw.csv", header = T)
dat = filter(GCST005536, coef == "Effect_2nd")
dat$pvalue = dat$Pr...t..
dat$Trait = "ebi-a-GCST005536"
dat4 = select(dat, Trait, Estimate, Std.Error, pvalue)
head(dat4)
Radial = rbind(dat1, dat2, dat3, dat4)
write.table(Radial, "Radial.IVW.Results.tab", quote = F, row.names = F, sep = ",")
ukbresults = fread("RESULTS.ukb-a.txt", header = T)
ebiresults = fread("RESULTS.ebi-a.txt", header = T)
binded = rbind(ukbresults,ebiresults, fill=TRUE)
write.table(binded, "MR.SignificantResults.tab", quote = F, row.names = F, sep = ",")
ukbple = fread("PLEITROPY.ukb-a.txt", header = T)
ebiple = fread("PLEITROPY.ebi-a.txt", header = T)
binded = rbind(ukbple,ebiple, fill=TRUE)
write.table(binded, "MR.SignificantPleiotropy.tab", quote = F, row.names = F, sep = ",")
ukbhet = fread("HETEROGENEITY.ukb-a.txt", header = T)
ebihet = fread("HETEROGENEITY.ebi-a.txt", header = T)
binded = rbind(ukbhet,ebihet, fill=TRUE)
write.table(binded, "MR.SignificantHeterogeneity.tab",quote = F, row.names = F, sep = ",")
R version 4.0.3 (2020-10-10) -- "Bunny-Wunnies Freak Out"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
Natural language support but running in an English locale
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> # Load libraries
> library(data.table)
> library(tidyverse)
> library(dplyr)
> #open clean ukb-a-ALL file witht the results
> GCST002318 = read.csv("MG.ebi-a-GCST002318.radial.ivw.csv", header = T)
> dat = filter(GCST002318, coef == "Effect_2nd")
> dat$pvalue = dat$Pr...t..
> dat$Trait = "ebi-a-GCST002318"
> dat1 = select(dat, Trait, Estimate, Std.Error, pvalue)
> head(dat1)
Trait Estimate Std.Error pvalue
1 ebi-a-GCST002318 0.2251425 0.06187024 0.0002737562
>
>
> GCST005531 = read.csv("MG.ebi-a-GCST005531.radial.ivw.csv", header = T)
> dat = filter(GCST005531, coef == "Effect_2nd")
> dat$pvalue = dat$Pr...t..
> dat$Trait = "ebi-a-GCST005531"
> dat2 = select(dat, Trait, Estimate, Std.Error, pvalue)
> head(dat2)
Trait Estimate Std.Error pvalue
1 ebi-a-GCST005531 0.1424691 0.04020715 0.0003950365
>
>
> ukba77 = read.csv("MG.ukb-a-77.radial.ivw.csv", header = T)
> dat = filter(ukba77, coef == "Effect_2nd")
> dat$pvalue = dat$Pr...t..
> dat$Trait = "ukb-a-77"
> dat3 = select(dat, Trait, Estimate, Std.Error, pvalue)
> head(dat3)
Trait Estimate Std.Error pvalue
1 ukb-a-77 8.315636 1.562361 1.02362e-07
>
> GCST005536 = read.csv("MG.ebi-a-GCST005536.radial.ivw.csv", header = T)
> dat = filter(GCST005536, coef == "Effect_2nd")
> dat$pvalue = dat$Pr...t..
> dat$Trait = "ebi-a-GCST005536"
> dat4 = select(dat, Trait, Estimate, Std.Error, pvalue)
> head(dat4)
Trait Estimate Std.Error pvalue
1 ebi-a-GCST005536 0.2840693 0.06240785 5.318378e-06
>
> Radial = rbind(dat1, dat2, dat3, dat4)
> write.table(Radial, "Radial.IVW.Results.tab", quote = F, row.names = F, sep = ",")
>
> ukbresults = fread("RESULTS.ukb-a.txt", header = T)
> ebiresults = fread("RESULTS.ebi-a.txt", header = T)
> binded = rbind(ukbresults,ebiresults, fill=TRUE)
> write.table(binded, "MR.SignificantResults.tab", quote = F, row.names = F, sep = ",")
>
> ukbple = fread("PLEITROPY.ukb-a.txt", header = T)
> ebiple = fread("PLEITROPY.ebi-a.txt", header = T)
> binded = rbind(ukbple,ebiple, fill=TRUE)
> write.table(binded, "MR.SignificantPleiotropy.tab", quote = F, row.names = F, sep = ",")
>
>
> ukbhet = fread("HETEROGENEITY.ukb-a.txt", header = T)
> ebihet = fread("HETEROGENEITY.ebi-a.txt", header = T)
> binded = rbind(ukbhet,ebihet, fill=TRUE)
> write.table(binded, "MR.SignificantHeterogeneity.tab",quote = F, row.names = F, sep = ",")
>
>
[-] Unloading gcc 9.2.0 ... [-] Unloading GSL 2.6 for GCC 9.2.0 ... [-] Unloading openmpi 3.1.4 for GCC 9.2.0 [-] Unloading ImageMagick 7.0.8 on cn1040 [-] Unloading HDF5 1.10.4 [-] Unloading NetCDF 4.7.4_gcc9.2.0 [-] Unloading pandoc 2.11.4 on cn1040 [-] Unloading pcre2 10.21 ... [-] Unloading R 4.0.3 [+] Loading gcc 9.2.0 ... [+] Loading GSL 2.6 for GCC 9.2.0 ... [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading openmpi 3.1.4 for GCC 9.2.0 [+] Loading ImageMagick 7.0.8 on cn1040 [+] Loading HDF5 1.10.4 [-] Unloading gcc 9.2.0 ... [+] Loading gcc 9.2.0 ... [+] Loading NetCDF 4.7.4_gcc9.2.0 [+] Loading pandoc 2.11.4 on cn1040 [+] Loading pcre2 10.21 ... [+] Loading R 4.0.3 The following have been reloaded with a version change: 1) R/4.0 => R/4.0.3 ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ── ✔ ggplot2 3.3.3 ✔ purrr 0.3.4 ✔ tibble 3.1.0 ✔ dplyr 1.0.5 ✔ tidyr 1.1.3 ✔ stringr 1.4.0 ✔ readr 1.4.0 ✔ forcats 0.5.1 ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::between() masks data.table::between() ✖ dplyr::filter() masks stats::filter() ✖ dplyr::first() masks data.table::first() ✖ dplyr::lag() masks stats::lag() ✖ dplyr::last() masks data.table::last() ✖ purrr::transpose() masks data.table::transpose()
SIDE NOTE: One of the significant traits is: ukb-a-190, Treatment/medication code: levothyroxine sodium. This medication is used to trait ukb-a-77 so we do NOT need a follow-up for ukb-a-190
%%bash
#cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/Ruth_MGpaper
#mkdir MendelianRandomization
cd /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/MR_Base/Results
scp MR.SignificantResults.tab /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/Ruth_MGpaper/MendelianRandomization
scp MR.SignificantPleiotropy.tab /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/Ruth_MGpaper/MendelianRandomization
scp MR.SignificantHeterogeneity.tab /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/Ruth_MGpaper/MendelianRandomization
scp Radial.IVW.Results.tab /data/ALS_50k/SaraSaez_ALS/2021-03-18.COLABORATION_MG/Ruth_MGpaper/MendelianRandomization